From 11deb2a39a1dab729e612ddc960d9dd84afc2e8e Mon Sep 17 00:00:00 2001 From: Timor Gruber Date: Fri, 8 May 2026 17:16:37 +0300 Subject: [PATCH 01/13] document uv Python tooling migration Capture the accepted uv migration design before planning so the Python tooling cleanup has a reviewed architectural baseline. --- docs/context/uv-python-tooling-anchor.md | 186 ++++++++ docs/rfcs/RFC-switch-to-uv-python-tooling.md | 432 ++++++++++++++++++ .../reviews/RFC-uv.rfc-architect-reviewer.md | 22 + .../reviews/RFC-uv.rfc-clarity-reviewer.md | 35 ++ docs/rfcs/reviews/RFC-uv.rfc-risk-reviewer.md | 23 + 5 files changed, 698 insertions(+) create mode 100644 docs/context/uv-python-tooling-anchor.md create mode 100644 docs/rfcs/RFC-switch-to-uv-python-tooling.md create mode 100644 docs/rfcs/reviews/RFC-uv.rfc-architect-reviewer.md create mode 100644 docs/rfcs/reviews/RFC-uv.rfc-clarity-reviewer.md create mode 100644 docs/rfcs/reviews/RFC-uv.rfc-risk-reviewer.md diff --git a/docs/context/uv-python-tooling-anchor.md b/docs/context/uv-python-tooling-anchor.md new file mode 100644 index 0000000..a5659bc --- /dev/null +++ b/docs/context/uv-python-tooling-anchor.md @@ -0,0 +1,186 @@ +# uv Python Tooling Anchor + +## Intent + +Decide whether and how this dotfiles repo should move Python tooling from +`pip`, `pipx`, `poetry`, and possibly `pyenv` toward `uv`, with the goal of +reducing shell complexity while preserving portable machine setup behavior. + +## Quick Summary + +| Settled Direction | What It Means | +|-------------------|---------------| +| RFC accepted. | `docs/rfcs/RFC-0001-switch-to-uv-python-tooling.md` R4 is the accepted design baseline. | +| Current repo treats Python mostly as shell runtime state. | Existing integration is in `dot_zshenv.tmpl`, `dot_zshrc.tmpl`, and `docs/processes/shell-startup.md`; the installer does not currently install Python tooling. | +| Directory-sensitive bare `python` selection is not required. | This removes the strongest reason to keep pyenv shims and shell integration. | +| `uv` should be a normal optional installer tool. | The installer should offer `uv`, but not preselect or special-case it; adding recommendation tiers is not worth the complexity. | +| Current repo has no Python project payload. | Searches found no `pyproject.toml`, `requirements*.txt`, `poetry.lock`, or `uv.lock`; Python references are shell integration, docs/examples, and packageresolver test fixtures. | +| Avoid a full Python setup subsystem. | `uv` should provide enough Python capability on demand; preinstalling Python versions or migrating old Python state is over-engineering for current repo priorities. | +| Keep useful Python completions. | Preserve `pip`, `poetry`, and guarded `pipx` completion caching; rely on Homebrew-provided `uv`/`uvx` completions in the MVP, with no installer-level zcompdump invalidation. | +| MVP accepts brew-only `uv` installation. | Add `uv` through the existing optional-tool/package-map flow; apt/dnf fallback can be revisited later. | +| No user-state migration. | Existing pyenv/pipx/Poetry state should be left alone; the installer is for initial bootstrapping, not ongoing state keeping. | + +## Current State + +- [x] Project docs and shell startup files were read for current architecture. +- [x] Current Python shell integration was identified: pyenv shims in `.zshenv`, lazy pyenv initialization in `.zshrc`, cached completions for Poetry, pip, and pipx. +- [x] `uv` documentation was checked for managed Python versions, `.python-version`, tools, auto-downloads, and executable behavior. +- [x] User confirmed directory-sensitive bare `python` selection is not important in day-to-day use. +- [x] User confirmed that if `uv` manages Python environments, the project installer should offer it. +- [x] Official `uv` installation docs were checked: package-manager installs are supported, and the standalone installer can install into `~/.local/bin` with `UV_NO_MODIFY_PATH=1` to avoid editing shell profiles. +- [x] Current repo usage was re-checked: there is no Python project dependency graph or Python-based installer path to preserve. +- [x] User rejected a complete Python setup subsystem as unnecessary right now. +- [x] User wants to keep `pip`, `poetry`, and guarded `pipx` completions, while adding `uv`/`uvx` completions and removing pyenv-specific shell behavior. +- [x] User decided `uv` should not be preselected or treated as more important than other optional tools. +- [x] Installer support was checked: standalone script installation exists as a pattern for chezmoi/Homebrew, but optional tools are currently package-manager-only. +- [x] User accepted package-manager-only `uv` availability for now, with brew-only support matching current setup reality and existing brew-only optional tools. +- [x] User confirmed no migration of existing pyenv, pipx, or Poetry user state is desired. +- [x] RFC R2 was written and passed architecture, risk, and clarity re-review after preserving guarded pipx completions. +- [x] User identified that brew-installed `uv`/`uvx` already provide completions through Homebrew; explicit uv/uvx completion caching is unnecessary for the brew-only MVP. +- [x] R3 review found completion-cache ambiguity; R4 clarifies stale zcompdump behavior and requires a safer pipx argcomplete guard. +- [x] RFC R4 passed architecture, risk, and clarity review with no concerns. +- [x] User accepted RFC R4. + +## Constraints + +- The repository is a chezmoi source directory; shell behavior changes should be made in source templates, not generated files. +- Shell startup performance matters; current pyenv integration is lazy because eager `pyenv init` was too costly or intrusive. +- The installer is the only writer of chezmoi data, but Python tooling is currently not part of the installer data contract. +- Existing design favors progressive enhancement: missing tools should not break shell startup. + +## Decisions + +### Python Version Selection Model + +**Decision:** The migration does not need to preserve pyenv-style automatic +directory-sensitive switching for bare `python` or `pip` commands. + +**Reason:** The feature was added recently but has not been useful in practice; +removing it simplifies shell startup and reduces pyenv-specific hacks. + +**Rejected:** Keep pyenv primarily to preserve automatic `cd project && python` +behavior. + +**Reason rejected:** That behavior is not part of the user's actual workflow. + +**Reconsider if:** A future workflow depends on invoking bare `python` or `pip` +inside project directories without routing through `uv` or an activated virtual +environment. + +### Installer Ownership + +**Decision:** If `uv` becomes the preferred Python environment/tool manager, the +installer should offer it as a normal optional tool, but the migration should +not create a full Python provisioning subsystem or a special recommendation +tier. + +**Reason:** The dotfiles should not depend on a manually installed external tool +for the primary Python workflow. At the same time, the repo has no Python +project payload today, so preinstalling Python versions, migrating existing +pyenv/pipx state, designing a dedicated Python setup flow, or adding optional +tool preselection is unnecessary. + +**Rejected:** Leave `uv` entirely to manual Homebrew/system installation while +still making dotfiles assume `uv` workflows. + +**Reason rejected:** That would create an implicit dependency and weaken the +portable bootstrap goal. + +**Rejected:** Add a recommended/default-selected optional-tool tier for `uv`. + +**Reason rejected:** It would be extra UX and installer complexity for a tool +that should remain optional like the rest of the daily-use CLI tools. + +**Reconsider if:** The repo gains Python project dependencies, the installer +starts requiring Python-managed tools during setup, or machines need a +pre-provisioned Python runtime rather than on-demand `uv` behavior. + +### Shell Completions + +**Decision:** Keep low-cost completion caching for relevant legacy Python tools: +retain `pip`, `poetry`, and guarded `pipx` completions. For the brew-only uv MVP, +rely on Homebrew-provided `uv` and `uvx` completions instead of generating cached +uv/uvx completions in `.zshrc`. + +**Reason:** Completion caching has minimal shell-startup cost and preserves +compatibility with projects that still use Poetry, core `pip` workflows, or an +installed `pipx` command. Homebrew's uv formula already generates uv/uvx +completions, and the shell already adds Homebrew's zsh completion directory to +`fpath`. + +**Decision:** Do not add installer-level `~/.zcompdump` invalidation in this RFC. + +**Reason:** The MVP relies on Homebrew completion files being present and on +normal shell completion cache refresh behavior. Immediate completion freshness +after installer-run `brew install uv` is not important enough to expand scope. + +**Rejected:** Invalidate zsh completion cache after optional-tool installs. + +**Reason rejected:** It is a broader installer behavior change that can be +considered separately if completion freshness becomes important. + +**Rejected:** Remove legacy Python completions as part of the cleanup. + +**Reason rejected:** `pip` remains core Python tooling, and Poetry remains common +enough that encountering existing projects is likely. `pipx` is no longer the +preferred manager, but if it is installed, accurate completions are cheap and +useful. + +**Reconsider if:** Completion generation becomes a measurable startup cost or +the tools are no longer encountered in practice. + +### Optional Tool Availability + +**Decision:** Add `uv` through the existing optional tool/package mapping flow, +with Homebrew support as the initial MVP. Do not add standalone installer +fallback or unsupported-tool warnings for this RFC. + +**Reason:** This matches existing brew-only optional tools (`sheldon`, `eza`, +`difftastic`) and is enough for the user's current setup pattern, where most +machines use Homebrew. + +**Rejected:** Extend optional tools with alternate install methods before adding +`uv`. + +**Reason rejected:** It is more work than the immediate need justifies. + +**Reconsider if:** Non-Homebrew Linux setups become common enough that missing +`uv` materially hurts bootstrap completeness. + +### User State Migration + +**Decision:** Do not migrate, clean up, uninstall, or convert existing `pyenv`, +`pipx`, or Poetry state. + +**Reason:** The installer is used for initial bootstrapping, not as an ongoing +state keeper. Existing state may remain on disk without being managed by the +dotfiles. + +**Rejected:** Convert pyenv Python versions, pipx-installed tools, or Poetry +projects into uv-managed equivalents. + +**Reason rejected:** It would add risk and complexity outside the dotfiles' +current responsibility. + +**Reconsider if:** The installer later becomes a state reconciliation tool or a +dedicated cleanup/migration command is explicitly requested. + +## Open Questions + +No active open questions for the accepted RFC. + +## References + +- `dot_zshenv.tmpl` +- `dot_zshrc.tmpl` +- `docs/processes/shell-startup.md` +- `installer/internal/config/tools.yaml` +- `installer/internal/config/packagemap.yaml` +- `docs/rfcs/RFC-0001-switch-to-uv-python-tooling.md` +- `docs/rfcs/reviews/RFC-0001.rfc-architect-reviewer.md` +- `docs/rfcs/reviews/RFC-0001.rfc-risk-reviewer.md` +- `docs/rfcs/reviews/RFC-0001.rfc-clarity-reviewer.md` +- https://docs.astral.sh/uv/concepts/python-versions/ +- https://docs.astral.sh/uv/concepts/tools/ +- https://docs.astral.sh/uv/getting-started/installation/ +- https://docs.astral.sh/uv/reference/installer/ diff --git a/docs/rfcs/RFC-switch-to-uv-python-tooling.md b/docs/rfcs/RFC-switch-to-uv-python-tooling.md new file mode 100644 index 0000000..2a0539a --- /dev/null +++ b/docs/rfcs/RFC-switch-to-uv-python-tooling.md @@ -0,0 +1,432 @@ +# RFC-0001: Switch To uv For Python Tooling + +Status: Accepted +Revision: R4 +Last Updated: 2026-05-08 + +## Review Record + +| Reviewer | Scope | Status | Notes | +|----------|-------|--------|-------| +| `rfc-architect-reviewer` | Architecture, boundaries, contracts, current-state fit, planning readiness | Passed | `docs/rfcs/reviews/RFC-0001.rfc-architect-reviewer.md`; R4 passed with no concerns. | +| `rfc-risk-reviewer` | Technical risks, migration, compatibility, rollback, hidden complexity | Passed | `docs/rfcs/reviews/RFC-0001.rfc-risk-reviewer.md`; R4 passed with no concerns. | +| `rfc-clarity-reviewer` | Clarity and actionability | Passed | `docs/rfcs/reviews/RFC-0001.rfc-clarity-reviewer.md`; R4 passed with no concerns. | + +## Summary + +This RFC changes the dotfiles' Python tooling posture from pyenv-centered shell +integration to uv-compatible, on-demand Python tooling. The repo does not +contain Python project dependencies or a Python-based installer path today, so +the chosen design is intentionally small: offer `uv` as a normal optional +installer tool, remove pyenv-specific shell startup behavior, and keep low-cost +completions for Python tools that are still useful in existing projects. + +The design does not introduce a dedicated Python setup subsystem. It does not +preinstall Python versions, migrate existing `pyenv` or `pipx` state, convert +Poetry projects, or add standalone uv installation fallback for non-Homebrew +systems. The immediate goal is to make uv available where the current optional +tools mechanism supports it, simplify shell startup, and leave future Python +needs to uv's on-demand workflows when uv is installed. No generated shell +behavior may require uv to be present. + +## Problem + +The current shell configuration contains pyenv-specific behavior even though +pyenv's main benefit in this setup, automatic directory-sensitive switching for +bare `python` and `pip`, is not part of the user's actual workflow. This creates +shell complexity without corresponding value: + +- `.zshenv` adds pyenv shims and pyenv's binary directory when `~/.pyenv` + exists. +- `.zshrc` lazily initializes pyenv and pyenv-virtualenv based on directory + markers or Python-related commands. +- A local `pyenv-shell` shim exists to work around pyenv shell-integration + behavior. + +At the same time, uv has become the preferred tool the user wants available for +Python environments, Python-based tools, and ad hoc Python work. The dotfiles +should shift toward uv-compatible shell behavior without over-engineering a full +Python lifecycle manager inside the installer or making shell startup depend on +uv availability. + +## Goals + +- Make `uv` available through the existing installer optional-tools flow where + the active package manager supports it. +- Remove pyenv-specific shell startup behavior from the dotfiles. +- Use Homebrew-provided uv/uvx completions when uv is installed by Homebrew. +- Preserve `pip`, `poetry`, and guarded `pipx` completions because they remain + useful for core Python workflows, existing projects, and machines where pipx + is still installed. +- Keep shell startup uv-compatible rather than uv-dependent. +- Keep the migration small, reversible, and aligned with the existing optional + tools architecture. + +## Non-Goals + +- Do not add a dedicated Python setup subsystem to the installer. +- Do not preinstall Python versions with `uv python install`. +- Do not migrate, delete, uninstall, or convert existing `pyenv`, `pipx`, or + Poetry state. +- Do not convert existing Poetry projects to uv projects. +- Do not preserve pyenv-style automatic directory-sensitive switching for bare + `python` or `pip`. +- Do not add optional-tool preselection, recommendation tiers, or special + prompt behavior for `uv`. +- Do not add standalone uv installer fallback for non-Homebrew active package + managers in this RFC. + +## Constraints + +- This repository is the chezmoi source directory; shell behavior changes belong + in source templates such as `dot_zshenv.tmpl` and `dot_zshrc.tmpl`. +- The installer writes chezmoi data, but Python tooling is not part of the + current chezmoi data contract. +- Shell startup should stay fast and progressively skip unavailable tools. +- The existing optional-tools flow is package-manager-backed and non-fatal. +- Existing brew-only optional tools are silently filtered out when the active + package manager is not Homebrew. +- The installer is used for initial bootstrapping, not ongoing state keeping. +- The user has explicitly accepted Homebrew-only uv availability for the MVP + because most target setups use Homebrew. +- The user has explicitly confirmed that directory-sensitive bare `python` + switching is not important in day-to-day use. + +## Current State + +The project has three relevant boundaries: + +- The Go installer bootstraps prerequisites, applies dotfiles, and optionally + installs selected CLI tools. +- Chezmoi templates define shell runtime behavior through files such as + `dot_zshenv.tmpl` and `dot_zshrc.tmpl`. +- The generated shell files configure runtime PATH, completions, Homebrew + loading, work environment loading, and tool integrations. + +Python support is currently shell-centric: + +- `dot_zshenv.tmpl` adds `~/.pyenv/shims` and `~/.pyenv/bin` to `PATH` when + `~/.pyenv` exists. +- `dot_zshrc.tmpl` lazily initializes pyenv and pyenv-virtualenv when a Python + marker or Python-related command is detected. +- `dot_zshrc.tmpl` caches completions for `poetry`, `pip`, and `pipx`. +- `private_dot_local/bin/executable_pyenv-shell` exists only to make `pyenv + shell` behave when full shell integration is not loaded. + +The optional tools system is package-manager-centered: + +- `installer/internal/config/tools.yaml` lists optional tool names and + descriptions. +- `installer/internal/config/packagemap.yaml` maps abstract tool names to + package-manager-specific names. +- During installation, `installOptionalTools` pre-filters tools by resolving + them for the active package manager. +- Tools without a valid mapping for the active package manager are dropped + before the interactive prompt and before `--install-tools` installation. +- Existing tools such as `sheldon`, `eza`, and `difftastic` are brew-only and do + not appear when the active package manager is not Homebrew. + +The repository currently has no Python project payload to preserve. Searches +found no `pyproject.toml`, `requirements*.txt`, `poetry.lock`, or `uv.lock` in +the repo. Python references are limited to shell integration, documentation, +examples, and package-resolver test fixtures. + +## Chosen Approach + +Adopt uv as the preferred Python tooling entrypoint while keeping the change +inside existing boundaries: + +- Add `uv` to `tools.yaml` as a normal optional tool. +- Add a Homebrew mapping for `uv` in `packagemap.yaml`. +- Let non-Homebrew active package managers continue using existing unsupported- + tool behavior: `uv` is filtered out when no mapping exists. +- Remove pyenv-specific PATH setup, lazy initialization, and local shim support. +- Keep cached completions for `pip`, `poetry`, and `pipx`. +- Rely on Homebrew-provided completions for `uv` and `uvx` in the MVP. +- Document the shell startup process as uv-compatible rather than pyenv-oriented. + +This design treats uv like other optional tools for installation while treating +pyenv as no longer part of the dotfiles' preferred shell runtime model. + +## Decision Summary + +| Decision | Rationale | Consequence | +|----------|-----------|-------------| +| Use uv as the preferred Python tooling entrypoint when available. | uv covers on-demand Python versions, project environments, and Python CLI tools without pyenv shell shims. | Python work should route through uv workflows when project-specific behavior is needed, but shell startup must not require uv. | +| Do not preserve automatic bare `python` switching. | Settled user decision: the user does not rely on this behavior. | pyenv shell integration can be removed without replacing its directory-sensitive shim model. | +| Install uv through optional tools. | The repo has no Python payload requiring a stronger bootstrap dependency. | uv remains user-selected and non-fatal like other optional tools. | +| Support Homebrew first. | The existing package-map flow already supports brew-only tools, and settled user decision accepts brew-only support for the MVP. | uv is hidden for non-Homebrew active package managers until explicit mappings or fallback install support are added. | +| Keep `pip`, `poetry`, and `pipx` completion caching. | They are low-cost and useful for existing Python projects or machines where pipx remains installed. | Removing pyenv and adding uv does not remove useful legacy completions. | +| Rely on Homebrew for uv/uvx completions. | The Homebrew uv formula generates uv and uvx completions, and `.zshrc` already adds Homebrew's zsh completion directory to `fpath`. | No explicit uv/uvx completion cache generation is needed for the brew-only MVP. | +| Do not migrate user state. | The installer is not an ongoing state keeper. | Existing pyenv/pipx/Poetry files remain untouched. | + +## Proposed Architecture + +The proposed architecture keeps Python tooling split across the same lifecycle +boundaries as the rest of the dotfiles: + +- Installer lifecycle: `uv` is offered during optional tools installation when + the active package manager can resolve it. +- Template lifecycle: shell templates stop adding pyenv runtime behavior and + retain guarded completion cache generation for pip/Poetry/pipx. uv/uvx + completions come from Homebrew's zsh completion directory when uv is installed + by Homebrew. +- Runtime lifecycle: shells no longer initialize pyenv, add pyenv shims, or + depend on a pyenv workaround shim. Shell startup remains valid without uv; + users invoke uv directly when it is installed and Python project or tool + behavior is needed. + +No new chezmoi data keys, installer state files, or persistent migration records +are introduced. + +## Components And Responsibilities + +| Component / Boundary | Responsibility | Current / New / Modified | Notes | +|----------------------|----------------|--------------------------|-------| +| `installer/internal/config/tools.yaml` | Lists optional tools shown by the installer after package-manager filtering. | Modified | Add `uv` with a user-facing description. | +| `installer/internal/config/packagemap.yaml` | Maps abstract optional tool names to package-manager package names. | Modified | Add `uv` for Homebrew only in the MVP. | +| `dot_zshenv.tmpl` | Defines fast environment setup for all Zsh sessions. | Modified | Remove pyenv PATH/shim setup. | +| `dot_zshrc.tmpl` | Defines interactive shell behavior and completion caching. | Modified | Remove lazy pyenv initialization; keep pip/Poetry/pipx completion caching; keep Homebrew completion loading for brew-installed uv/uvx. | +| `private_dot_local/bin/executable_pyenv-shell` | Provides a pyenv-specific workaround command. | Removed | No longer needed once pyenv shell integration is removed. | +| `docs/processes/shell-startup.md` | Documents shell startup behavior. | Modified | Update pyenv-oriented current-state documentation to the uv-compatible runtime model. | +| `docs/domain.md` | Describes optional tools and domain concepts. | Modified | Keep optional-tool documentation accurate after adding uv to the configured optional tools. | +| `docs/architecture.md` | Describes shell runtime dependencies. | Modified | Stop naming pyenv as a shell runtime dependency; describe Python tooling as optional and guarded. | + +## Contracts And Interfaces + +The optional tools contract remains unchanged: + +- Tool definitions remain entries in `tools.yaml`. +- Package resolution remains driven by `packagemap.yaml`. +- Unsupported tools remain filtered out before selection. +- `--install-tools` continues to install all currently resolvable tools. +- Interactive selection continues to show all currently resolvable tools + unselected by default. + +The shell runtime contract changes by removal rather than replacement: + +- The dotfiles no longer guarantee that pyenv shims are placed before other + Python executables. +- The dotfiles no longer guarantee that `~/.pyenv/bin` is placed on `PATH`, so + explicit `pyenv` commands are available only if the user manages pyenv outside + these dotfiles. +- The dotfiles no longer initialize pyenv or pyenv-virtualenv in interactive + shells, including for `.python-version`, `.envrc`, or explicit `pyenv*` + command detection. +- The dotfiles no longer provide a `pyenv-shell` workaround command. +- The dotfiles retain guarded completion cache hooks for `pip`, `poetry`, and + `pipx`. +- The dotfiles do not generate cached uv/uvx completions in the MVP. Brew- + installed uv/uvx completions are provided through Homebrew's zsh completion + directory, which is already added to `fpath` before `compinit`. +- Newly installed Homebrew completions may not be visible immediately if + `compinit -C` reuses an existing `~/.zcompdump`; the MVP does not add installer- + level zcompdump invalidation. +- pipx remains completion-supported when installed, but it is no longer the + preferred Python tool manager in the dotfiles' documentation or direction. + +The completion contract is intentionally concrete: + +- `pip` and `poetry` completion generation remains guarded by command existence + checks and stale cache checks, matching the existing completion-cache pattern. +- `pipx` completion generation is guarded by both `pipx` and + `register-python-argcomplete` availability, plus the same stale cache check. +- uv/uvx completion behavior follows the Homebrew formula for the brew-only MVP. +- If future work adds non-Homebrew uv installation, that work must decide whether + to generate uv/uvx completions explicitly or rely on that install method. + +## Data And State + +No new durable data is introduced. + +Existing user-owned state is intentionally left untouched: + +- `~/.pyenv` may continue to exist but is no longer managed or placed on PATH by + the dotfiles. +- Existing pipx tool environments may continue to exist and can still receive + guarded completion refreshes when both the `pipx` command and + `register-python-argcomplete` helper are available. +- Existing completion cache files for tools that are no longer installed may + remain; they are residual user state and are not authoritative for the current + dotfiles source. +- Existing Poetry projects and Poetry user configuration are not modified. +- uv-managed Python versions, caches, and tools are created later by uv itself + when the user invokes uv workflows. + +## Control And Data Flow + +During installer optional tools setup: + +1. The installer loads `tools.yaml`. +2. The package resolver tries to resolve each tool for the active package + manager. +3. `uv` resolves on Homebrew systems through the new `packagemap.yaml` entry. +4. `uv` does not resolve when the active package manager is not Homebrew in the + MVP and is filtered out like existing brew-only tools. +5. Interactive users may select `uv` if it is visible; `--install-tools` installs + it automatically only when it is resolvable. + +During shell startup: + +1. `.zshenv` performs base PATH, Homebrew, Rust, and work-environment setup + without pyenv-specific PATH mutation. +2. `.zshrc` initializes completions and interactive shell integrations without + pyenv lazy hooks. +3. Completion cache generation creates or refreshes `pip` and `poetry` + completion files only when the corresponding command is available and the + cache is missing or stale. +4. `pipx` completion cache generation also requires + `register-python-argcomplete` to be available before refreshing `_pipx`. +5. Brew-installed uv/uvx completions are discovered through Homebrew's zsh + completion directory on `fpath`; `.zshrc` does not generate uv/uvx completion + cache files in the MVP. +6. If `~/.zcompdump` is stale, Homebrew-provided uv/uvx completions may not be + discovered until the completion cache is refreshed. The existing interactive + `brew` shell wrapper handles future interactive brew changes, but installer- + driven package installs do not gain a new cache invalidation mechanism in this + RFC. +7. Python project behavior is handled by explicit uv commands or by whatever + Python environment the user activates outside the dotfiles. + +## Failure Modes And Recovery + +| Failure Mode | Expected Behavior | Recovery / User Impact | +|--------------|-------------------|------------------------| +| `uv` has no mapping for the active package manager. | It is filtered out before prompt/installation. | User can install uv manually or wait for future standalone fallback support. | +| Homebrew installation of `uv` fails. | Optional tool installation records the failure and continues with other tools. | Main installation remains successful; user can retry or install manually. | +| `uv` is not installed at shell startup. | No uv/uvx completions are available from Homebrew. | Shell starts normally without uv completions. | +| Homebrew installs uv but zsh completion cache is stale. | `_uv` and `_uvx` files may exist in Homebrew's completion directory but not be discovered by `compinit -C`. | Completions appear after the zsh completion cache is refreshed; this RFC does not add installer-level cache invalidation. | +| Existing project expects pyenv shims, `pyenv` on PATH, or pyenv initialization from `.envrc`/`.python-version`. | Dotfiles no longer provide those shims, PATH entries, initialization hooks, or `pyenv-shell` workaround behavior. | User can invoke uv workflows, activate a project environment, or manage pyenv PATH/init outside these dotfiles. | +| Existing pipx-installed command remains on PATH from prior user state. | Dotfiles do not remove it and continue guarded completion refreshes when pipx and `register-python-argcomplete` are available. | Command may still work if user state provides it; pipx remains completion-supported but is not the preferred tool manager. | + +## Security, Privacy, And Permissions + +This RFC does not add a new download script, credential surface, or privilege +model. uv installation uses the same package-manager path as other optional +tools in the MVP. Future standalone installer support would require a separate +security review because it would download and execute an external installer +script. + +## Operations And Observability + +The installer observability model remains unchanged: + +- Unsupported optional tools are silently filtered before prompt/installation. +- Installation failures for selected tools are reported in the optional-tools + summary. +- Shell completion generation remains silent and guarded by command existence + checks. + +No new metrics, logs, or state files are introduced. + +## Compatibility And Migration + +This is a forward-looking shell behavior migration, not a user-state migration. + +Compatibility expectations: + +- Existing machines with pyenv installed are not modified on disk. +- Existing shells generated after this change will stop adding pyenv behavior. +- Existing projects or scripts that call `pyenv` directly must manage pyenv PATH + and initialization outside these dotfiles. +- Existing Poetry and pip workflows remain supported at the completion level. +- Existing pipx workflows are not actively broken; pipx remains + completion-supported when installed and when `register-python-argcomplete` is + available, but is no longer the preferred Python tool manager. +- Existing cached completions for removed tools or pyenv-related target files + may remain as residual state; this RFC does not require cleanup. +- Non-Homebrew systems continue to behave like they do for existing brew-only + optional tools: unsupported tools are omitted from selection. + +Rollback is source-level: reintroducing pyenv shell blocks and +`private_dot_local/bin/executable_pyenv-shell` in the chezmoi source, then +applying dotfiles, restores the previous shell integration model assuming user +pyenv state still exists. + +## Alternatives Considered + +| Alternative | Strengths | Why Rejected | +|-------------|-----------|--------------| +| Keep pyenv and add uv alongside it. | Maximum compatibility for bare `python` directory switching. | Preserves shell complexity for behavior the user does not use. | +| Create a dedicated Python setup subsystem. | Could install uv, preinstall Python versions, and define a complete Python baseline. | Over-engineered for a repo with no Python project payload and no installer-time Python dependency. | +| Add standalone uv installer fallback now. | Would support non-Homebrew active package managers even without native package mappings. | More installer design work than the immediate need justifies; most target setups use Homebrew. | +| Add default-selected/recommended optional tools. | Could nudge users toward uv during setup. | Adds UX and installer complexity for a tool that should remain optional. | +| Remove all Python completions. | Simplest shell cleanup. | `pip` remains core, Poetry remains common in existing projects, and installed pipx still benefits from accurate completions. | +| Migrate existing pyenv/pipx/Poetry state. | Could produce a cleaner local machine after migration. | Outside installer responsibility and risks changing user-owned state unexpectedly. | + +## Risks And Tradeoffs + +| Risk / Tradeoff | Impact | Mitigation Or Acceptance | +|-----------------|--------|--------------------------| +| `uv` is initially Homebrew-only in optional tools. | Non-Homebrew active package managers will not offer uv through the installer. | Accepted for MVP; future standalone fallback can be designed if non-Homebrew setups become important. | +| Removing pyenv shell integration may surprise projects that relied on bare `python` switching, `pyenv` on PATH, or pyenv activation from `.envrc`/`.python-version`. | Such projects may resolve a different Python or fail to call `pyenv` unless the user invokes uv, activates an environment, or manages pyenv separately. | Accepted because the user does not rely on this behavior; pyenv state is not deleted. | +| Keeping Poetry/pip/pipx completion caching preserves some legacy Python surface. | Shell config remains slightly broader than a pure uv setup. | Accepted because completion generation is guarded and useful for existing projects or machines where pipx remains installed. | +| Relying on Homebrew for uv/uvx completions ties completion availability to the brew formula. | If uv is installed outside Homebrew, uv/uvx completions may not be available from the dotfiles. | Accepted because the MVP installs uv through Homebrew only; non-Homebrew uv installation is future work. | +| Installer-driven Homebrew installs can leave `~/.zcompdump` stale. | uv/uvx completion files may not be discovered immediately after installer-run `brew install uv`. | Accepted for MVP; the RFC narrows success to Homebrew-provided completion files being available on `fpath`, not immediate compinit cache refresh. | +| No user-state cleanup leaves old pyenv/pipx files on disk. | Machines may contain unused legacy state. | Accepted because the installer is not a state keeper; cleanup can be manual or future explicit work. | + +## Success Criteria + +- `uv` is offered and resolvable as an optional tool on Homebrew-backed installs. +- pyenv-specific shell startup behavior is removed from generated Zsh files. +- Shell startup documentation no longer describes pyenv as part of the runtime + model. +- `pip` and `poetry` completions are generated only when the corresponding + command exists and the cache is missing or stale. +- `pipx` completions are generated only when both `pipx` and + `register-python-argcomplete` exist and the cache is missing or stale. +- Brew-installed uv/uvx completion files are provided in Homebrew's zsh + completion directory and made discoverable through `fpath`, without + dotfiles-managed uv/uvx completion generation or installer-level zcompdump + invalidation. +- Existing user state for pyenv, pipx, and Poetry is not modified by the + dotfiles or installer. + +## Future Work + +- Non-Homebrew uv installation can be revisited later, either through a + generalized optional-tool alternate-install mechanism or a uv-specific + standalone fallback. That decision is intentionally outside this RFC and must + include a completion-source decision for uv/uvx. +- Installer-level zcompdump invalidation after optional tool installation can be + considered separately if completion freshness after package-manager installs + becomes important. + +## Planning Handoff + +Planning should preserve the MVP boundary. The design is not a request for a +general Python management system, optional-tool recommendation UX, state +migration, or standalone installer support. Any implementation plan should keep +changes within the existing optional-tools mapping model, shell templates, +pyenv-specific source files, and documentation that currently describes shell +startup behavior. + +If future planning proposes non-Homebrew uv installation, it should be treated +as a separate design extension because it changes installer installation methods +and security considerations. + +## Source References + +| Source | What It Confirms | +|--------|------------------| +| `docs/domain.md` | Optional tools are daily-use CLI tools, are not persisted in chezmoi data, and may have platform-dependent availability. | +| `docs/architecture.md` | The project separates installer bootstrap, chezmoi templates, and shell runtime behavior. | +| `docs/processes/tools-installation.md` | Optional tools are pre-filtered through package resolution, selected interactively or via `--install-tools`, installed through the active package manager, and non-fatal. | +| `docs/processes/shell-startup.md` | Current documented shell startup includes pyenv setup, lazy pyenv initialization, and completion caching for Poetry, pip, and pipx. | +| `dot_zshenv.tmpl` | Current all-shell startup adds pyenv shims and pyenv binary path when `~/.pyenv` exists. | +| `dot_zshrc.tmpl` | Current interactive startup lazily initializes pyenv and caches Poetry, pip, and pipx completions. | +| `private_dot_local/bin/executable_pyenv-shell` | Current repo includes a pyenv-specific workaround shim. | +| `installer/internal/config/tools.yaml` | Current optional tool list does not include uv. | +| `installer/internal/config/packagemap.yaml` | Current package map has existing brew-only tools and no uv mapping. | +| `installer/cmd/install.go` | Current optional-tools flow filters resolvable tools before prompt/install and installs through `ToolsInstaller`. | +| `installer/lib/toolsinstaller/installer.go` | Current `ToolsInstaller` resolves each tool and installs through the package manager, continuing after failures. | +| uv documentation: `https://docs.astral.sh/uv/concepts/python-versions/` | uv supports managed Python versions, `.python-version`, automatic downloads, and system Python discovery. | +| uv documentation: `https://docs.astral.sh/uv/concepts/tools/` | uv provides `uv tool install` and `uvx` for isolated Python CLI tools. | +| uv documentation: `https://docs.astral.sh/uv/getting-started/installation/` | uv is available through Homebrew and other install paths, including a standalone installer. | +| Homebrew uv formula fetched via `gh api repos/Homebrew/homebrew-core/contents/Formula/u/uv.rb` | Homebrew generates completions from both `uv generate-shell-completion` and `uvx --generate-shell-completion` during formula installation. | +| `dot_zshrc.tmpl` | Homebrew's zsh `site-functions` directory is added to `fpath` before `compinit`, allowing brew-installed completions to be discovered. | +| Settled user decision | Directory-sensitive bare `python` switching is not important in day-to-day use. | +| Settled user decision | A full Python setup subsystem, preinstalled Python versions, and user-state migration are out of scope. | +| Settled user decision | `pip`, `poetry`, and guarded `pipx` completions should remain. | +| Settled user decision | `uv` should not be preselected or specially recommended in the optional-tools prompt. | +| Settled user decision | Brew-only optional-tool support is acceptable for the MVP because most target setups use Homebrew. | diff --git a/docs/rfcs/reviews/RFC-uv.rfc-architect-reviewer.md b/docs/rfcs/reviews/RFC-uv.rfc-architect-reviewer.md new file mode 100644 index 0000000..e369221 --- /dev/null +++ b/docs/rfcs/reviews/RFC-uv.rfc-architect-reviewer.md @@ -0,0 +1,22 @@ +# RFC Architecture Review: RFC-0001 Switch To uv For Python Tooling + +## Verdict + +PASS + +## Critical Findings + +None. + +## Concerns + +None. + +## Observations + +- The main architectural boundaries remain sound: uv is a normal optional tool; no Python setup subsystem, new chezmoi data contract, standalone installer fallback, or user-state migration is introduced. +- R4 resolves the prior blocking finding about Homebrew-owned completions and `compinit -C` cache semantics. The RFC now explicitly states that stale `~/.zcompdump` may delay discovery of Homebrew-provided `_uv` and `_uvx`, and that installer-level cache invalidation is out of scope for this MVP. +- R4's completion ownership split is architecturally coherent: pip, Poetry, and pipx remain dotfiles-managed guarded cache targets, while uv/uvx completions are owned by Homebrew for the brew-only MVP. +- The pipx completion contract now covers both required executables: `pipx` and `register-python-argcomplete`. +- Current-state claims checked against source remain broadly consistent: pyenv PATH setup exists in `dot_zshenv.tmpl`, lazy pyenv initialization and pip/Poetry/pipx completion caching exist in `dot_zshrc.tmpl`, `pyenv-shell` exists as a local shim, optional tools are pre-filtered through package resolution, and `uv` is not currently in `tools.yaml` or `packagemap.yaml`. +- The active package-manager wording concern from the prior review is resolved; R4 consistently describes unsupported uv availability as non-Homebrew active package-manager behavior rather than OS-level Linux behavior. diff --git a/docs/rfcs/reviews/RFC-uv.rfc-clarity-reviewer.md b/docs/rfcs/reviews/RFC-uv.rfc-clarity-reviewer.md new file mode 100644 index 0000000..1c3508b --- /dev/null +++ b/docs/rfcs/reviews/RFC-uv.rfc-clarity-reviewer.md @@ -0,0 +1,35 @@ +# RFC Clarity Review: RFC-0001 Switch To uv For Python Tooling + +## Verdict + +PASS + +## Critical Findings + +Empty. + +## Concerns + +Empty. + +## Observations + +- R4 keeps the MVP boundary explicit: uv is a normal optional tool, Homebrew-only + availability is accepted for this RFC, standalone fallback is future work, no + user-state migration is included, and installer-level zcompdump invalidation is + out of scope. +- R4 names `docs/domain.md`, `docs/architecture.md`, and + `docs/processes/shell-startup.md` as documentation updates needed to keep the + optional-tool and shell-startup descriptions consistent with the chosen model. +- The pipx role is clear: pipx is retained only as a guarded completion target + when both `pipx` and `register-python-argcomplete` are installed, while uv is + the preferred Python tooling entrypoint and pipx is not the preferred Python + tool manager. +- The completion-source distinction is clear: `pip`, `poetry`, and `pipx` keep + guarded dotfiles-managed cache generation, while uv/uvx completions are not + generated by the dotfiles and instead come from Homebrew's zsh + `site-functions` directory for brew-installed uv. +- R4 explicitly scopes the stale `~/.zcompdump` behavior: Homebrew may install + `_uv` and `_uvx`, but `compinit -C` can delay discovery until the completion + cache refreshes; this is accepted rather than left as an unresolved follow-up + inside the RFC. diff --git a/docs/rfcs/reviews/RFC-uv.rfc-risk-reviewer.md b/docs/rfcs/reviews/RFC-uv.rfc-risk-reviewer.md new file mode 100644 index 0000000..b42e46c --- /dev/null +++ b/docs/rfcs/reviews/RFC-uv.rfc-risk-reviewer.md @@ -0,0 +1,23 @@ +# RFC Risk Review: RFC-0001 Switch To uv For Python Tooling + +## Verdict + +PASS + +## Critical Findings + +Empty. + +## Concerns + +Empty. + +## Observations + +- R4 resolves the prior stale `~/.zcompdump` concern at the design level. The RFC now explicitly states that `compinit -C` may reuse an existing completion dump, that installer-driven Homebrew installs do not gain a new invalidation path, and that immediate uv/uvx completion discovery is outside the MVP success boundary. +- R4 resolves the prior pipx completion guard concern. The completion contract, control flow, data/state section, compatibility section, and success criteria now require both `pipx` and `register-python-argcomplete` before refreshing `_pipx`. +- The uv/uvx completion-generation scope remains appropriately narrow: rely on Homebrew's uv formula and existing Homebrew `fpath` loading rather than adding dotfiles-managed uv/uvx cache files. +- The pyenv removal boundary remains clear: no shim PATH setup, no `~/.pyenv/bin` PATH setup, no lazy initialization for `.python-version`/`.envrc`, and no `pyenv-shell` workaround. +- The no-migration and residual-state behavior is explicit enough for planning: existing pyenv, pipx, Poetry, uv-managed state, and old completion-cache files may remain and are not treated as authoritative current dotfile state. +- Brew-only, optional uv availability is acknowledged as an accepted MVP tradeoff; the future non-Homebrew uv path is correctly treated as a separate design extension with its own completion-source and security decisions. +- Removing pyenv shell integration is a real compatibility break for projects that depended on bare `python` switching, but the RFC states this as an accepted user decision and preserves rollback at the source-template level. From 4cdf5e9369a39ae23ad29ac5a6d073e09245862b Mon Sep 17 00:00:00 2001 From: Timor Gruber Date: Sat, 9 May 2026 00:53:19 +0300 Subject: [PATCH 02/13] add project-local OpenCode worker agents Provide reusable project-local workers for Go, Zsh, docs, and test authoring so feature plans can bind execution to explicit model tiers and required skills instead of relying on the current session. --- .opencode/agents/cheapest-go-worker.md | 28 ++++++++++++++++++ .opencode/agents/cheapest-zsh-worker.md | 24 +++++++++++++++ .opencode/agents/mid-tier-go-worker.md | 28 ++++++++++++++++++ .opencode/agents/mid-tier-zsh-worker.md | 24 +++++++++++++++ .opencode/agents/most-capable-docs-worker.md | 27 +++++++++++++++++ .opencode/agents/most-capable-go-worker.md | 28 ++++++++++++++++++ .../agents/most-capable-test-author-worker.md | 29 +++++++++++++++++++ .opencode/agents/most-capable-zsh-worker.md | 24 +++++++++++++++ 8 files changed, 212 insertions(+) create mode 100644 .opencode/agents/cheapest-go-worker.md create mode 100644 .opencode/agents/cheapest-zsh-worker.md create mode 100644 .opencode/agents/mid-tier-go-worker.md create mode 100644 .opencode/agents/mid-tier-zsh-worker.md create mode 100644 .opencode/agents/most-capable-docs-worker.md create mode 100644 .opencode/agents/most-capable-go-worker.md create mode 100644 .opencode/agents/most-capable-test-author-worker.md create mode 100644 .opencode/agents/most-capable-zsh-worker.md diff --git a/.opencode/agents/cheapest-go-worker.md b/.opencode/agents/cheapest-go-worker.md new file mode 100644 index 0000000..6ca8993 --- /dev/null +++ b/.opencode/agents/cheapest-go-worker.md @@ -0,0 +1,28 @@ +--- +name: cheapest-go-worker +description: "Use this project-local worker for Go implementation tasks assigned to the cheapest model tier. It is intended for straightforward Go/config changes with clear acceptance criteria." +mode: subagent +model: openai/gpt-5.5 +reasoningEffort: low +permission: + edit: allow + bash: allow + webfetch: deny + task: + "*": deny + skill: + managing-chezmoi: allow + writing-go-code: allow + writing-go-tests: allow + testing-go-code: allow + linting-go-code: allow + building-go-binaries: allow +--- + +You are a project-local Go implementation worker running at the cheapest tier with low reasoning effort. + +Load these skills immediately before working: `managing-chezmoi`, `writing-go-code`, `writing-go-tests`, `testing-go-code`, `linting-go-code`, and `building-go-binaries`. + +Your task prompt and the assigned sub-plan are the source of truth. Read the relevant files, make only the requested changes, verify through the loaded testing, linting, and building skills, and report the changes and verification results. + +Do not run raw Go commands directly when a loaded skill provides the project command. Do not commit changes unless explicitly instructed. diff --git a/.opencode/agents/cheapest-zsh-worker.md b/.opencode/agents/cheapest-zsh-worker.md new file mode 100644 index 0000000..9da489f --- /dev/null +++ b/.opencode/agents/cheapest-zsh-worker.md @@ -0,0 +1,24 @@ +--- +name: cheapest-zsh-worker +description: "Use this project-local worker for Zsh or chezmoi shell-template tasks assigned to the cheapest model tier. It is intended for small, clear shell edits." +mode: subagent +model: openai/gpt-5.5 +reasoningEffort: low +permission: + edit: allow + bash: allow + webfetch: deny + task: + "*": deny + skill: + managing-chezmoi: allow + configuring-zsh: allow +--- + +You are a project-local Zsh implementation worker running at the cheapest tier with low reasoning effort. + +Load these skills immediately before working: `managing-chezmoi` and `configuring-zsh`. + +Your task prompt and the assigned sub-plan are the source of truth. Read the relevant shell templates and docs, make only the requested changes, verify through the loaded skills, and report the changes and verification results. + +Preserve fast shell startup, guarded command checks, and existing template conventions. Do not commit changes unless explicitly instructed. diff --git a/.opencode/agents/mid-tier-go-worker.md b/.opencode/agents/mid-tier-go-worker.md new file mode 100644 index 0000000..67626e1 --- /dev/null +++ b/.opencode/agents/mid-tier-go-worker.md @@ -0,0 +1,28 @@ +--- +name: mid-tier-go-worker +description: "Use this project-local worker for Go implementation tasks assigned to the mid-tier model. It is intended for Go changes with moderate integration, edge cases, or coordination risk." +mode: subagent +model: openai/gpt-5.5 +reasoningEffort: medium +permission: + edit: allow + bash: allow + webfetch: deny + task: + "*": deny + skill: + managing-chezmoi: allow + writing-go-code: allow + writing-go-tests: allow + testing-go-code: allow + linting-go-code: allow + building-go-binaries: allow +--- + +You are a project-local Go implementation worker running at the mid-tier reasoning effort. + +Load these skills immediately before working: `managing-chezmoi`, `writing-go-code`, `writing-go-tests`, `testing-go-code`, `linting-go-code`, and `building-go-binaries`. + +Your task prompt and the assigned sub-plan are the source of truth. Read the relevant files, make only the requested changes, verify through the loaded testing, linting, and building skills, and report the changes and verification results. + +Do not run raw Go commands directly when a loaded skill provides the project command. Do not commit changes unless explicitly instructed. diff --git a/.opencode/agents/mid-tier-zsh-worker.md b/.opencode/agents/mid-tier-zsh-worker.md new file mode 100644 index 0000000..72a9fc8 --- /dev/null +++ b/.opencode/agents/mid-tier-zsh-worker.md @@ -0,0 +1,24 @@ +--- +name: mid-tier-zsh-worker +description: "Use this project-local worker for Zsh or chezmoi shell-template tasks assigned to the mid-tier model. It is intended for shell startup, PATH, completion, and hook changes where regressions are costly." +mode: subagent +model: openai/gpt-5.5 +reasoningEffort: medium +permission: + edit: allow + bash: allow + webfetch: deny + task: + "*": deny + skill: + managing-chezmoi: allow + configuring-zsh: allow +--- + +You are a project-local Zsh implementation worker running at the mid-tier reasoning effort. + +Load these skills immediately before working: `managing-chezmoi` and `configuring-zsh`. + +Your task prompt and the assigned sub-plan are the source of truth. Read the relevant shell templates and docs, make only the requested changes, verify through the loaded skills, and report the changes and verification results. + +Preserve fast shell startup, guarded command checks, and existing template conventions. Do not commit changes unless explicitly instructed. diff --git a/.opencode/agents/most-capable-docs-worker.md b/.opencode/agents/most-capable-docs-worker.md new file mode 100644 index 0000000..9c7efe1 --- /dev/null +++ b/.opencode/agents/most-capable-docs-worker.md @@ -0,0 +1,27 @@ +--- +name: most-capable-docs-worker +description: "Use this project-local worker for documentation tasks assigned to the most capable model tier. It is intended for domain, architecture, and process documentation updates that require synthesizing implementation context." +mode: subagent +model: openai/gpt-5.5 +reasoningEffort: xhigh +permission: + edit: allow + bash: deny + webfetch: deny + task: + "*": deny + skill: + managing-chezmoi: allow + documenting-domain: allow + documenting-architecture: allow + documenting-business-processes: allow + documenting-components: allow +--- + +You are a project-local documentation worker running at the most capable tier with extra-high reasoning effort. + +Load `managing-chezmoi` immediately. Also load the relevant documentation skills for the assigned task when available: `documenting-domain`, `documenting-architecture`, `documenting-business-processes`, and `documenting-components`. + +Your task prompt and the assigned sub-plan are the source of truth. Update only the requested documentation, keep claims grounded in the implementation and referenced plans, and preserve project terminology and document structure. + +Do not invent implemented behavior. Do not commit changes unless explicitly instructed. diff --git a/.opencode/agents/most-capable-go-worker.md b/.opencode/agents/most-capable-go-worker.md new file mode 100644 index 0000000..86b4107 --- /dev/null +++ b/.opencode/agents/most-capable-go-worker.md @@ -0,0 +1,28 @@ +--- +name: most-capable-go-worker +description: "Use this project-local worker for Go implementation tasks assigned to the most capable model tier. It is intended for complex Go work requiring broad reasoning or high correctness confidence." +mode: subagent +model: openai/gpt-5.5 +reasoningEffort: xhigh +permission: + edit: allow + bash: allow + webfetch: deny + task: + "*": deny + skill: + managing-chezmoi: allow + writing-go-code: allow + writing-go-tests: allow + testing-go-code: allow + linting-go-code: allow + building-go-binaries: allow +--- + +You are a project-local Go implementation worker running at the most capable tier with extra-high reasoning effort. + +Load these skills immediately before working: `managing-chezmoi`, `writing-go-code`, `writing-go-tests`, `testing-go-code`, `linting-go-code`, and `building-go-binaries`. + +Your task prompt and the assigned sub-plan are the source of truth. Read the relevant files, make only the requested changes, verify through the loaded testing, linting, and building skills, and report the changes and verification results. + +Do not run raw Go commands directly when a loaded skill provides the project command. Do not commit changes unless explicitly instructed. diff --git a/.opencode/agents/most-capable-test-author-worker.md b/.opencode/agents/most-capable-test-author-worker.md new file mode 100644 index 0000000..1d88cd0 --- /dev/null +++ b/.opencode/agents/most-capable-test-author-worker.md @@ -0,0 +1,29 @@ +--- +name: most-capable-test-author-worker +description: "Use this project-local worker to write tests from acceptance criteria before implementation. It always runs on the most capable model tier because test intent and failure quality are critical." +mode: subagent +model: openai/gpt-5.5 +reasoningEffort: xhigh +permission: + edit: allow + bash: allow + webfetch: deny + task: + "*": deny + skill: + managing-chezmoi: allow + test-driven-development: allow + writing-go-code: allow + writing-go-tests: allow + testing-go-code: allow + linting-go-code: allow + building-go-binaries: allow +--- + +You are a project-local test author worker running at the most capable tier with extra-high reasoning effort. + +Load these skills immediately before working: `managing-chezmoi`, `test-driven-development`, `writing-go-code`, `writing-go-tests`, and `testing-go-code`. Load `linting-go-code` and `building-go-binaries` when verification requires them. + +Write tests from the assigned sub-plan acceptance criteria before implementation. Confirm the tests fail for the intended reason when feasible, then report the test files changed and failure output. Do not implement production code unless the task explicitly asks for it. + +Do not run raw Go commands directly when a loaded skill provides the project command. Do not commit changes unless explicitly instructed. diff --git a/.opencode/agents/most-capable-zsh-worker.md b/.opencode/agents/most-capable-zsh-worker.md new file mode 100644 index 0000000..7b7f795 --- /dev/null +++ b/.opencode/agents/most-capable-zsh-worker.md @@ -0,0 +1,24 @@ +--- +name: most-capable-zsh-worker +description: "Use this project-local worker for Zsh or chezmoi shell-template tasks assigned to the most capable model tier. It is intended for complex shell startup work requiring high correctness confidence." +mode: subagent +model: openai/gpt-5.5 +reasoningEffort: xhigh +permission: + edit: allow + bash: allow + webfetch: deny + task: + "*": deny + skill: + managing-chezmoi: allow + configuring-zsh: allow +--- + +You are a project-local Zsh implementation worker running at the most capable tier with extra-high reasoning effort. + +Load these skills immediately before working: `managing-chezmoi` and `configuring-zsh`. + +Your task prompt and the assigned sub-plan are the source of truth. Read the relevant shell templates and docs, make only the requested changes, verify through the loaded skills, and report the changes and verification results. + +Preserve fast shell startup, guarded command checks, and existing template conventions. Do not commit changes unless explicitly instructed. From 8841c9f12438c7dedd3095d1182972fa789cff95 Mon Sep 17 00:00:00 2001 From: Timor Gruber Date: Sat, 9 May 2026 14:29:40 +0300 Subject: [PATCH 03/13] remove pyenv shell bindings, add uv as optional tool The goal is to switch from pyenv to uv for Python management, as uv becomes the industry standard nowadays, and can safely replace most of pyenv's capabilities, as well as others'. --- docs/architecture.md | 5 +- docs/context/uv-python-tooling-anchor.md | 13 ++-- docs/domain.md | 6 +- docs/processes/shell-startup.md | 53 +++++++------- dot_zshenv.tmpl | 17 ----- dot_zshrc.tmpl | 69 +------------------ installer/internal/config/packagemap.yaml | 3 + installer/internal/config/tools.yaml | 2 + .../lib/packageresolver/integration_test.go | 41 +++++++++++ private_dot_local/bin/executable_pyenv-shell | 18 ----- 10 files changed, 87 insertions(+), 140 deletions(-) delete mode 100644 private_dot_local/bin/executable_pyenv-shell diff --git a/docs/architecture.md b/docs/architecture.md index f30220f..a0e5145 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -27,9 +27,9 @@ The project has three major parts that interact through a clear data contract: a ### Shell Configuration (`dot_zshenv.tmpl`, `dot_zshrc.tmpl`, `private_dot_work/`) -- **Responsibility**: Configure the runtime shell environment — PATH, tool integrations, plugins, [work environment][domain-work-env] loading +- **Responsibility**: Configure the runtime shell environment — PATH, guarded tool integrations, plugins, [work environment][domain-work-env] loading - **Boundaries**: These are chezmoi templates that produce shell files. At runtime, the generated files are plain shell scripts with no chezmoi dependency. -- **Dependencies**: Generated by chezmoi from templates. At runtime, depends on installed tools (Homebrew, pyenv, sheldon, fzf, etc.) +- **Dependencies**: Generated by chezmoi from templates. At runtime, progressively uses installed tools (Homebrew, sheldon, fzf, optional Python tooling, etc.) when available and skips guarded integrations when they are missing. ### Embedded Configuration (`installer/internal/config/`) @@ -63,6 +63,7 @@ Generated shell files execute in Zsh's standard sourcing order: `.zshenv` (all s | Chezmoi as dotfiles manager | Not bare git, stow, or yadm | Templates with conditional logic, mature tooling, good cross-platform support. The two-tier work environment model relies on chezmoi's template system. | | Sheldon for Zsh plugins | Not oh-my-zsh framework | oh-my-zsh is used only for vendored function/plugin snippets (loaded as local files), not as a runtime framework. Sheldon is faster and more composable. | | Deferred Homebrew loading | Split across `.zshenv` / `.zshrc` | Homebrew's `shellenv` eval is expensive (~50ms). Deferring it on macOS keeps non-interactive shells fast while still having brew available for interactive use. | +| Python tooling as optional runtime surface | Offer `uv` through optional tools and keep shell startup guarded | The dotfiles do not manage Python versions or initialize Python version-manager shims. Existing `pip`, `poetry`, and `pipx` completions remain guarded, while brew-installed uv/uvx completions are discovered through Homebrew's completion directory. | | Fresh clone on every apply | Delete and re-clone `~/.local/share/chezmoi` | Avoids merge conflicts and stale state. The installer always applies the latest from the configured branch. | ## Diagram diff --git a/docs/context/uv-python-tooling-anchor.md b/docs/context/uv-python-tooling-anchor.md index a5659bc..c40e939 100644 --- a/docs/context/uv-python-tooling-anchor.md +++ b/docs/context/uv-python-tooling-anchor.md @@ -10,8 +10,8 @@ reducing shell complexity while preserving portable machine setup behavior. | Settled Direction | What It Means | |-------------------|---------------| -| RFC accepted. | `docs/rfcs/RFC-0001-switch-to-uv-python-tooling.md` R4 is the accepted design baseline. | -| Current repo treats Python mostly as shell runtime state. | Existing integration is in `dot_zshenv.tmpl`, `dot_zshrc.tmpl`, and `docs/processes/shell-startup.md`; the installer does not currently install Python tooling. | +| RFC accepted. | `docs/rfcs/RFC-switch-to-uv-python-tooling.md` R4 is the accepted design baseline. | +| Implementation complete. | `uv` is configured as a brew-only optional tool, pyenv shell runtime behavior has been removed from the dotfiles source, and docs describe the uv-compatible optional Python tooling model. | | Directory-sensitive bare `python` selection is not required. | This removes the strongest reason to keep pyenv shims and shell integration. | | `uv` should be a normal optional installer tool. | The installer should offer `uv`, but not preselect or special-case it; adding recommendation tiers is not worth the complexity. | | Current repo has no Python project payload. | Searches found no `pyproject.toml`, `requirements*.txt`, `poetry.lock`, or `uv.lock`; Python references are shell integration, docs/examples, and packageresolver test fixtures. | @@ -40,11 +40,12 @@ reducing shell complexity while preserving portable machine setup behavior. - [x] R3 review found completion-cache ambiguity; R4 clarifies stale zcompdump behavior and requires a safer pipx argcomplete guard. - [x] RFC R4 passed architecture, risk, and clarity review with no concerns. - [x] User accepted RFC R4. +- [x] Implementation plan `plans/features/switch-to-uv-python-tooling/` executed completely; final installer verification passed with `rtk go test ./...` from `installer/`. ## Constraints - The repository is a chezmoi source directory; shell behavior changes should be made in source templates, not generated files. -- Shell startup performance matters; current pyenv integration is lazy because eager `pyenv init` was too costly or intrusive. +- Shell startup performance matters; avoid reintroducing eager Python tool initialization or mandatory uv startup work. - The installer is the only writer of chezmoi data, but Python tooling is currently not part of the installer data contract. - Existing design favors progressive enhancement: missing tools should not break shell startup. @@ -176,10 +177,8 @@ No active open questions for the accepted RFC. - `docs/processes/shell-startup.md` - `installer/internal/config/tools.yaml` - `installer/internal/config/packagemap.yaml` -- `docs/rfcs/RFC-0001-switch-to-uv-python-tooling.md` -- `docs/rfcs/reviews/RFC-0001.rfc-architect-reviewer.md` -- `docs/rfcs/reviews/RFC-0001.rfc-risk-reviewer.md` -- `docs/rfcs/reviews/RFC-0001.rfc-clarity-reviewer.md` +- `docs/rfcs/RFC-switch-to-uv-python-tooling.md` +- `plans/features/switch-to-uv-python-tooling/progress.md` - https://docs.astral.sh/uv/concepts/python-versions/ - https://docs.astral.sh/uv/concepts/tools/ - https://docs.astral.sh/uv/getting-started/installation/ diff --git a/docs/domain.md b/docs/domain.md index a57862f..ada5d57 100644 --- a/docs/domain.md +++ b/docs/domain.md @@ -88,12 +88,14 @@ See the [package resolution process][pkg-resolution] for the resolution flow fro ### Optional Tools -Daily-use CLI tools (fzf, bat, eza, ripgrep, fd, difftastic, sheldon) that the installer can install at the user's request. Defined in [`tools.yaml`][tools-yaml], separate from prerequisites. +Daily-use CLI tools (fzf, bat, eza, ripgrep, fd, difftastic, sheldon, uv) that the installer can install at the user's request. Defined in [`tools.yaml`][tools-yaml], separate from prerequisites. - **Tool definition**: An entry in `tools.yaml` with a `name` ([abstract package key](#package-resolution)) and a human-readable `description`. - **Not required**: Unlike prerequisites, optional tools are not needed for correct dotfiles setup. They enhance the shell experience but the system works without them. - **Not persisted**: Tool selections are not saved to chezmoi data — tools are not part of the data contract. -- **Platform-dependent availability**: Not all tools have package mappings for every manager. Some (e.g., `sheldon`, `eza`, `difftastic`) are brew-only. +- **Platform-dependent availability**: Not all tools have package mappings for every manager. Some (e.g., `sheldon`, `eza`, `difftastic`, `uv`) are brew-only. + +`uv` is an optional Python tooling entrypoint in this model. It is available through the optional-tools flow only where package resolution supports it; it is not required for dotfiles setup, is not written to chezmoi data, and does not imply installer-managed Python versions or migration of existing Python tool state. See the [optional tools installation process][tools-install] for selection, filtering, and installation details. diff --git a/docs/processes/shell-startup.md b/docs/processes/shell-startup.md index ce30f7b..46430e6 100644 --- a/docs/processes/shell-startup.md +++ b/docs/processes/shell-startup.md @@ -2,7 +2,7 @@ ## Overview -Describes how the Zsh shell environment initializes, including Homebrew loading, PATH setup, work environment activation, and plugin loading. The startup is split across `.zshenv` (all shells) and `.zshrc` (interactive shells) with an emphasis on keeping non-interactive shell startup fast. +Describes how the Zsh shell environment initializes, including Homebrew loading, PATH setup, guarded completion setup, work environment activation, and plugin loading. The startup is split across `.zshenv` (all shells) and `.zshrc` (interactive shells) with an emphasis on keeping non-interactive shell startup fast. ## Trigger @@ -31,10 +31,7 @@ flowchart TD D1 --> D4["BREW_LOADED=true"] D2 --> D4 D3 --> D4 - D4 --> E{~/.pyenv exists?} - E -- Yes --> E1[Add pyenv shims to PATH] - E -- No --> F - E1 --> F{Linux + mold?} + D4 --> F{Linux + mold?} F -- Yes --> F1[Set RUSTFLAGS for mold] F -- No --> G F1 --> G{work_env?} @@ -51,8 +48,7 @@ flowchart TD L -- Yes --> L1[load_brew_env now] L -- No --> M L1 --> M[Aliases & tool PATHs] - M --> N[Lazy pyenv init] - N --> O[Cache completions] + M --> O[Cache guarded completions] O --> P{work_env?} P -- Yes --> P1[Source work RC extension] P -- No --> Q @@ -86,27 +82,25 @@ flowchart TD - **Linux**: Call `load_brew_env` immediately — PATH consistency is more important than startup speed - **Devbox**: Add brew directories to PATH directly — no `eval` needed - Set `BREW_LOADED=true` to prevent double-loading -4. **Set up pyenv** — If `~/.pyenv` exists, add shims and bin directories to PATH (fast, no eval) -5. **Set up Rust linker** — On Linux with mold available, configure `RUSTFLAGS` to use mold -6. **Load work environment** — If `personal.work_env` is true, see the [work environment loading process][work-env-loading] +4. **Set up Rust linker** — On Linux with mold available, configure `RUSTFLAGS` to use mold +5. **Load work environment** — If `personal.work_env` is true, see the [work environment loading process][work-env-loading] ### `.zshrc` Phase (Interactive Shells Only) 1. **Powerlevel10k instant prompt** — Load cached prompt for immediate visual feedback 2. **Set editor and locale** — `VISUAL`, `EDITOR`, `LANG`, `LC_ALL` 3. **Configure history** — History file, size, append mode -4. **Set up completions** — Configure fpath, load Homebrew completions, run `compinit -C` (cached) +4. **Set up completions** — Configure fpath, add Homebrew's `share/zsh/site-functions` directory before `compinit`, then run `compinit -C` (cached). 5. **Complete deferred brew loading** — If `DEFER_BREW_LOAD` is `true`, call `load_brew_env` now 6. **Set up aliases** — Git, neovim, GPG unlock -7. **Configure pyenv** — Lazy initialization: full pyenv shell integration is loaded only when a Python-related command is detected or `.python-version`/`.envrc` is found in the directory tree -8. **Add tool PATHs** — Cargo, Go, Ruby gems, clang-format, bun (conditional on tool availability) -9. **Cache completions** — Generate completion files for cargo, poetry, pip, pipx only if missing or older than 7 days -10. **Source work RC extension** — If `personal.work_env` is true, source `WORK_ZSH_RC_EXTENSION` (see [work environment loading][work-env-loading]) -11. **Load oh-my-zsh functions and plugins** — Vendored git functions, key-bindings, git, dotenv plugins -12. **Load sheldon plugins** — `eval "$(sheldon source)"` loads the plugin set defined in [`plugins.toml`][sheldon-plugins] -13. **Load Powerlevel10k theme** — Source `~/.p10k.zsh` -14. **Configure fzf and fzf-tab** — Fuzzy finder integration with completion system -15. **VS Code shell integration** — If running inside VS Code terminal, load VS Code's shell integration script +7. **Add tool PATHs** — Cargo, Go, Ruby gems, clang-format, bun (conditional on tool availability) +8. **Cache completions** — Generate completion files for cargo, poetry, pip, pipx only if missing or older than 7 days. `pipx` completion generation requires both `pipx` and `register-python-argcomplete` to exist before invoking argcomplete. +9. **Source work RC extension** — If `personal.work_env` is true, source `WORK_ZSH_RC_EXTENSION` (see [work environment loading][work-env-loading]) +10. **Load oh-my-zsh functions and plugins** — Vendored git functions, key-bindings, git, dotenv plugins +11. **Load sheldon plugins** — `eval "$(sheldon source)"` loads the plugin set defined in [`plugins.toml`][sheldon-plugins] +12. **Load Powerlevel10k theme** — Source `~/.p10k.zsh` +13. **Configure fzf and fzf-tab** — Fuzzy finder integration with completion system +14. **VS Code shell integration** — If running inside VS Code terminal, load VS Code's shell integration script ### Failure Scenarios @@ -124,17 +118,24 @@ flowchart TD - **Handling**: The `[[ -r ... ]]` guard skips sourcing. Shell starts without work config. - **User impact**: Work-specific environment variables and paths won't be set. Run the installer with `--work-env` to regenerate. -#### pyenv not installed +#### Optional Python tooling not installed -- **Trigger**: `~/.pyenv` directory doesn't exist -- **At step**: `.zshenv` step 4, `.zshrc` step 7 -- **Handling**: Conditional checks skip all pyenv setup -- **User impact**: None — pyenv features simply aren't available +- **Trigger**: `uv`, `pip`, `poetry`, `pipx`, or `register-python-argcomplete` is not installed +- **At step**: `.zshrc` steps 4 and 8 +- **Handling**: Guarded completion setup skips missing tools. Brew-installed uv/uvx completions are available only when Homebrew provides completion files on `fpath` and zsh's completion cache recognizes them. +- **User impact**: Shell startup succeeds. Missing tools simply do not provide completions or commands until the user installs or activates them. + +#### Stale zsh completion cache after Homebrew uv installation + +- **Trigger**: `uv` is installed by Homebrew outside the interactive `brew` wrapper path, but `compinit -C` reuses an older `~/.zcompdump` +- **At step**: `.zshrc` step 4 +- **Handling**: The Homebrew completion directory remains on `fpath`, but zsh may not immediately discover new `_uv` or `_uvx` completion files until the cache is refreshed +- **User impact**: uv commands work if installed, but uv/uvx completions may appear only after normal completion cache refresh or manual cache invalidation ## State Changes - **Environment variables**: PATH, BREW_HOME, BREW_LOADED, DEFER_BREW_LOAD, WORK_ENV_LOADED, and tool-specific vars -- **Shell functions**: `load_brew_env`, `brew` wrapper (invalidates completion cache on install/uninstall), pyenv lazy loaders +- **Shell functions**: `load_brew_env`, `brew` wrapper (invalidates completion cache on install/uninstall) - **Completion system**: `compinit` initialized with cache, fpath populated ## Dependencies diff --git a/dot_zshenv.tmpl b/dot_zshenv.tmpl index 8b7e169..9ce6f0e 100644 --- a/dot_zshenv.tmpl +++ b/dot_zshenv.tmpl @@ -35,23 +35,6 @@ if [[ ! -v BREW_LOADED || "$BREW_LOADED" == "false" ]]; then BREW_LOADED=true fi -# pyenv: make shims available in all shells (fast; no eval) -if [[ -d "$HOME/.pyenv" ]]; then - export PYENV_ROOT="$HOME/.pyenv" - - # Put pyenv shims first so `python`/`pip` resolve via pyenv when configured. - case ":$PATH:" in - *":$PYENV_ROOT/shims:"*) ;; - *) export PATH="$PYENV_ROOT/shims:$PATH" ;; - esac - - # Ensure the pyenv binary is on PATH as well. - case ":$PATH:" in - *":$PYENV_ROOT/bin:"*) ;; - *) export PATH="$PYENV_ROOT/bin:$PATH" ;; - esac -fi - {{ if (eq .chezmoi.os "linux") -}} if [[ -x "$HOME/.cargo/bin/cargo" ]] && command -v mold &>/dev/null; then # Always prefer to use mold as the linker on Linux diff --git a/dot_zshrc.tmpl b/dot_zshrc.tmpl index 4e0f19a..a8841d3 100644 --- a/dot_zshrc.tmpl +++ b/dot_zshrc.tmpl @@ -63,73 +63,6 @@ function brew() { return $ret } -# Lazy pyenv shell integration (interactive only) -# - Keep startup fast by avoiding `pyenv init` during shell start. -# - Load `pyenv-virtualenv` integration only when Python tooling is used. -if [[ -o interactive ]] && [[ -d "$HOME/.pyenv" ]]; then - _pyenv_find_up() { - local target="$1" - local dir="$PWD" - - while [[ -n "$dir" && "$dir" != "/" && "$dir" != "$HOME" ]]; do - [[ -e "$dir/$target" ]] && return 0 - dir="${dir:h}" - done - - [[ -e "$dir/$target" ]] && return 0 - return 1 - } - - _pyenv_lazy_init() { - # Guard against re-entrancy. - if [[ -n "${_PYENV_LAZY_INIT_DONE:-}" ]]; then - return 0 - fi - _PYENV_LAZY_INIT_DONE=1 - - # Load full shell integration. - # Use `command` to ensure we call the pyenv binary (not a shell function). - eval "$(command pyenv init - zsh)" - eval "$(command pyenv virtualenv-init - zsh)" - - # Remove hook + helpers to keep runtime overhead at ~0. - autoload -U add-zsh-hook - add-zsh-hook -d preexec _pyenv_lazy_preexec - typeset -ga precmd_functions - precmd_functions=(${precmd_functions:#_pyenv_lazy_precmd}) - unfunction _pyenv_lazy_precmd _pyenv_lazy_preexec _pyenv_lazy_init _pyenv_find_up 2>/dev/null - } - - _pyenv_lazy_precmd() { - # Initialize before the first prompt if the current directory (or an ancestor) - # indicates that pyenv integration is needed. - # - # - `.python-version` enables pyenv local versions / pyenv-virtualenv activation. - # - `.envrc` is a common driver via direnv (`use pyenv ...`), which would otherwise - # call `pyenv shell` without shell integration and print warnings. - if _pyenv_find_up ".python-version" || _pyenv_find_up ".envrc"; then - _pyenv_lazy_init - fi - } - - _pyenv_lazy_preexec() { - # Only trigger on commands that plausibly need Python/pyenv. - local cmd="$1" - case "$cmd" in - pyenv*|python*|pip*|poetry*|pipx*|pytest*|tox*|uv*|ruff*|mypy*) - _pyenv_lazy_init - ;; - esac - } - - autoload -U add-zsh-hook - typeset -ga precmd_functions - if (( ${precmd_functions[(I)_pyenv_lazy_precmd]} == 0 )); then - precmd_functions=(_pyenv_lazy_precmd $precmd_functions) - fi - add-zsh-hook preexec _pyenv_lazy_preexec -fi - # Fast PATH additions (no expensive operations) if command -v cargo &>/dev/null; then export PATH="$PATH:$HOME/.cargo/bin" @@ -162,7 +95,7 @@ fi [[ -x "$(command -v pip)" && ( ! -f "$HOME/.zfunc/_pip" || -n "$(find "$HOME/.zfunc/_pip" -mtime +7 2>/dev/null)" ) ]] && pip completion --zsh > "$HOME/.zfunc/_pip" 2>/dev/null # pipx uses argcomplete - cache the generated completion -[[ -x "$(command -v pipx)" && ( ! -f "$HOME/.zfunc/_pipx" || -n "$(find "$HOME/.zfunc/_pipx" -mtime +7 2>/dev/null)" ) ]] && register-python-argcomplete pipx > "$HOME/.zfunc/_pipx" 2>/dev/null +[[ -x "$(command -v pipx)" && -x "$(command -v register-python-argcomplete)" && ( ! -f "$HOME/.zfunc/_pipx" || -n "$(find "$HOME/.zfunc/_pipx" -mtime +7 2>/dev/null)" ) ]] && register-python-argcomplete pipx > "$HOME/.zfunc/_pipx" 2>/dev/null # Load dotnet completions if available if command -v dotnet &>/dev/null; then diff --git a/installer/internal/config/packagemap.yaml b/installer/internal/config/packagemap.yaml index e226a9b..5890658 100644 --- a/installer/internal/config/packagemap.yaml +++ b/installer/internal/config/packagemap.yaml @@ -101,3 +101,6 @@ packages: name: jq dnf: name: jq + uv: + brew: + name: uv diff --git a/installer/internal/config/tools.yaml b/installer/internal/config/tools.yaml index b2b0e70..38b88de 100644 --- a/installer/internal/config/tools.yaml +++ b/installer/internal/config/tools.yaml @@ -15,3 +15,5 @@ tools: description: "Structural diff tool" - name: jq description: "Lightweight command-line JSON processor" + - name: uv + description: "Fast Python package and project manager" diff --git a/installer/lib/packageresolver/integration_test.go b/installer/lib/packageresolver/integration_test.go index fccd9b8..4e6a220 100644 --- a/installer/lib/packageresolver/integration_test.go +++ b/installer/lib/packageresolver/integration_test.go @@ -115,6 +115,47 @@ func Test_LoadPackageMappings_CanLoadFromEmbeddedConfig(t *testing.T) { // We don't assert specific content since it might change, but verify it loads } +func Test_Resolver_ResolvesUVForBrewOnly(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test") + } + + v := viper.New() + mappings, err := packageresolver.LoadPackageMappings(v, "") + require.NoError(t, err) + + sysInfo := &compatibility.SystemInfo{ + OSName: "darwin", + DistroName: "macOS", + Arch: "arm64", + } + + brewPM := &pkgmanager.MoqPackageManager{ + GetInfoFunc: func() (pkgmanager.PackageManagerInfo, error) { + return pkgmanager.PackageManagerInfo{Name: "brew"}, nil + }, + } + brewResolver, err := packageresolver.NewResolver(mappings, brewPM, sysInfo) + require.NoError(t, err) + + result, err := brewResolver.Resolve("uv", "") + require.NoError(t, err) + require.Equal(t, "uv", result.Name) + require.Nil(t, result.VersionConstraints) + + aptPM := &pkgmanager.MoqPackageManager{ + GetInfoFunc: func() (pkgmanager.PackageManagerInfo, error) { + return pkgmanager.PackageManagerInfo{Name: "apt"}, nil + }, + } + aptResolver, err := packageresolver.NewResolver(mappings, aptPM, sysInfo) + require.NoError(t, err) + + _, err = aptResolver.Resolve("uv", "") + require.Error(t, err) + require.Contains(t, err.Error(), "no package mapping found") +} + func Test_LoadPackageMappings_HandlesLargeConfigFile(t *testing.T) { if testing.Short() { t.Skip("skipping integration test") diff --git a/private_dot_local/bin/executable_pyenv-shell b/private_dot_local/bin/executable_pyenv-shell deleted file mode 100644 index 087adae..0000000 --- a/private_dot_local/bin/executable_pyenv-shell +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env sh - -# Shim for `pyenv shell` when shell integration isn't loaded. -# -# Some tools invoke `/opt/homebrew/bin/pyenv shell ...` directly. The pyenv -# dispatcher expects shell integration for `pyenv shell` and otherwise prints: -# "pyenv: shell integration not enabled..." -# -# This shim provides a `pyenv-shell` command so `pyenv shell ...` no longer -# errors. When stdout is a TTY (interactive terminal), it stays silent to avoid -# polluting startup. When stdout isn't a TTY (typically used with `eval "$(...)"`), -# it delegates to `pyenv sh-shell` which prints the required export/unset code. - -if [ -t 1 ]; then - exit 0 -fi - -exec pyenv sh-shell "$@" From b6ed5659cd8c3f890a96724343b71ad91ada369d Mon Sep 17 00:00:00 2001 From: Timor Gruber Date: Sat, 9 May 2026 15:28:11 +0300 Subject: [PATCH 04/13] put proportionality guardrails on docs skills The recent feature tried to make itself the center of the solution, but it should've been treated merely as an example. --- dot_agents/skills/documenting-architecture/SKILL.md | 2 ++ dot_agents/skills/documenting-business-processes/SKILL.md | 2 ++ dot_agents/skills/documenting-components/SKILL.md | 2 ++ dot_agents/skills/documenting-domain/SKILL.md | 2 ++ dot_agents/skills/documenting-library-guides/SKILL.md | 2 ++ 5 files changed, 10 insertions(+) diff --git a/dot_agents/skills/documenting-architecture/SKILL.md b/dot_agents/skills/documenting-architecture/SKILL.md index b035233..1b82ef1 100644 --- a/dot_agents/skills/documenting-architecture/SKILL.md +++ b/dot_agents/skills/documenting-architecture/SKILL.md @@ -14,6 +14,7 @@ Document how a system is designed: its layers, boundaries, communication pattern 3. **Accurate to the Code**: Document the architecture as it IS, not as it was designed to be. If the code has drifted from the original vision, document reality. 4. **No Duplication Across Levels**: Architecture docs reference domain concepts — they never redefine business terms. If you need to mention a domain entity, link to the domain docs. Similarly, don't describe individual component internals — that belongs in component docs. 5. **Discover, Don't Assume**: Read the code to understand the actual architecture. Don't project patterns onto it based on naming alone. +6. **Proportionality To Document Purpose**: A feature can require architecture doc updates without becoming the architecture's center of gravity. Preserve the target doc's existing abstraction level and emphasis. Mention feature-specific tools, libraries, or commands only where they change durable boundaries, dependencies, or decisions. ## Documentation Hierarchy @@ -133,5 +134,6 @@ See `docs/architecture/.md` for . - **Never describe component internals** — that belongs in component docs; architecture describes how components relate - **Document reality, not aspirations** — if the code doesn't match the intended architecture, document what exists and note the drift - **Rationale is mandatory** — every design decision must include "why", not just "what" +- **Preserve proportional emphasis** — broad architecture docs should stay broad. If a feature only changes one dependency, guard, or integration point, update that specific claim instead of re-centering the document around the feature. - **Use reference-style links** — when linking to other docs or source files, use reference links (`[text][ref]` with `[ref]: path` at the bottom of the file) rather than inline links. They read better in source and are easier to maintain. - **Propose structure first** — if no architecture docs exist yet, propose a directory structure and format before creating files diff --git a/dot_agents/skills/documenting-business-processes/SKILL.md b/dot_agents/skills/documenting-business-processes/SKILL.md index 42016a5..cd55730 100644 --- a/dot_agents/skills/documenting-business-processes/SKILL.md +++ b/dot_agents/skills/documenting-business-processes/SKILL.md @@ -14,6 +14,7 @@ Document the business processes and workflows a system implements: how domain en 3. **NOT Development Processes**: This skill documents business workflows the system implements (user registration, order fulfillment). Development processes (how to test, deploy, contribute) belong in skills and contribution guidelines — not here. 4. **No Duplication Across Levels**: Reference domain docs for entity definitions and architecture docs for system structure. Process docs describe the FLOW — how entities traverse the architecture. Don't redefine domain terms or re-explain architectural patterns. 5. **Include Failure Paths**: Happy paths are easy. Document what happens when things go wrong — failed payments, validation errors, timeouts, partial completions. +6. **Proportionality To Process Scope**: A feature can change one step of a process without becoming the whole process. Preserve the existing process's trigger, actors, and end-to-end emphasis. Mention feature-specific tools or mechanisms only where they affect the flow, decisions, outcomes, or failure handling. ## Documentation Hierarchy @@ -163,6 +164,7 @@ See `docs/processes/.md` for . - **Never redefine domain concepts or architectural patterns** — reference the appropriate docs - **Always include failure paths** — happy-path-only docs are incomplete and misleading - **Business language first** — describe what happens from the business perspective, then note which components are involved +- **Preserve proportional emphasis** — if updating an existing broad process doc, adjust the affected steps and failure scenarios without turning the process into a feature-specific walkthrough. - **Use reference-style links** — when linking to other docs or source files, use reference links (`[text][ref]` with `[ref]: path` at the bottom of the file) rather than inline links. They read better in source and are easier to maintain. - **Decompose complex steps into sub-processes** — if a step has its own decision branches, failure modes, or multiple sequential actions, it needs its own doc. Don't inline what should be a sub-process. - **Match existing sub-process patterns** — if a parent doc already has sub-process links, new sub-processes must follow the exact same structure and link style diff --git a/dot_agents/skills/documenting-components/SKILL.md b/dot_agents/skills/documenting-components/SKILL.md index 64d70a6..1297d34 100644 --- a/dot_agents/skills/documenting-components/SKILL.md +++ b/dot_agents/skills/documenting-components/SKILL.md @@ -15,6 +15,7 @@ Create focused, accurate documentation for specific components or areas of a cod 4. **Accurate Over Comprehensive**: Only document what you can verify from the code. Never invent or assume behavior. 5. **Discover, Don't Assume**: Find the project's existing documentation structure before creating new files. 6. **No Duplication Across Levels**: Before documenting a concept, check if it already exists at a higher documentation level (domain, architecture, or business processes). If it does, **reference it** instead of redefining it. Component docs are the lowest layer — they reference all layers above. +7. **Proportionality To Component Scope**: Document the component's durable responsibilities, interfaces, and gotchas. Do not let a recent feature dominate the component doc unless it fundamentally changes the component's purpose or primary API. ## Documentation Hierarchy @@ -111,5 +112,6 @@ After documentation is created, future planning sessions and executing agents be - **Match existing doc style** — don't introduce a new format if docs already exist - **Propose, don't force** — if unsure about structure, location, or scope, ask the user - **Keep it maintainable** — shorter accurate docs beat comprehensive stale docs +- **Preserve proportional emphasis** — when updating existing component docs for a feature, change the relevant section only. Avoid converting component documentation into a feature changelog. - **Use reference-style links** — when linking to other docs or source files, use reference links (`[text][ref]` with `[ref]: path` at the bottom of the file) rather than inline links. They read better in source and are easier to maintain. - **No meta-commentary** — don't add "this doc was auto-generated" or session timestamps diff --git a/dot_agents/skills/documenting-domain/SKILL.md b/dot_agents/skills/documenting-domain/SKILL.md index bceea9f..c2d9d14 100644 --- a/dot_agents/skills/documenting-domain/SKILL.md +++ b/dot_agents/skills/documenting-domain/SKILL.md @@ -14,6 +14,7 @@ Document the business domain of a project: its terminology, entities, relationsh 3. **Accurate Over Comprehensive**: Only document domain concepts that actually exist in the codebase. Don't invent domain models the project doesn't implement. 4. **Non-Technical Where Possible**: Domain docs should be readable by non-technical stakeholders. Minimize code references — save those for component docs. 5. **No Duplication Across Levels**: Domain docs are the canonical source for business concepts. Architecture, process, and component docs **reference** domain docs — they never redefine domain terms. If you find a concept documented elsewhere, consolidate it here and replace the duplicate with a reference. +6. **Proportionality To Document Purpose**: A feature can motivate a doc update without becoming the doc's center of gravity. Preserve the document's existing purpose, audience, abstraction level, and relative emphasis. Mention feature-specific tools or mechanisms only as much as the domain concept requires. ## Documentation Hierarchy @@ -111,5 +112,6 @@ See `docs/domain/.md` for . - **One definition per concept** — if a term is already defined in domain docs, other layers must reference it, not redefine it - **Defer process details to process docs** — if a concept involves a multi-step flow (loading chain, resolution sequence, initialization steps), define the concept here and link to the process doc for the "how" - **No behavioral conditionals** — if you're writing "when X happens", "in Y mode", or "if Z flag is set", that's a process description. Domain docs define static properties of concepts (what they are, what they contain, how they relate). Conditional behavior, modes, flags, and runtime decisions belong in process docs. +- **Preserve proportional emphasis** — if updating an existing broad domain doc for a narrow feature, make the smallest accurate change. Do not let the feature's terminology dominate the domain doc unless the feature actually changes the domain's primary language. - **Use reference-style links** — when linking to other docs or source files, use reference links (`[text][ref]` with `[ref]: path` at the bottom of the file) rather than inline links. They read better in source and are easier to maintain. - **Propose structure first** — if no domain docs exist yet, propose a directory structure and format to the user before creating files diff --git a/dot_agents/skills/documenting-library-guides/SKILL.md b/dot_agents/skills/documenting-library-guides/SKILL.md index c665d4c..a837e35 100644 --- a/dot_agents/skills/documenting-library-guides/SKILL.md +++ b/dot_agents/skills/documenting-library-guides/SKILL.md @@ -14,6 +14,7 @@ Create user-facing documentation for libraries — the docs users actually read. 3. **Connected to the Code**: Every example must come from the actual codebase — real types, real method signatures, real return values. Never write pseudocode or hypothetical examples. If the API changes, the docs should feel immediately stale so they get updated. 4. **Site-Ready Structure**: Organize docs as if they'll be served by a static site generator (Starlight, MkDocs, Docusaurus). Each page stands alone but fits a reading order. Sidebar-friendly hierarchy, clear page boundaries, cross-page linking. 5. **Adjacent, Not Redundant**: These docs complement inline code documentation — they don't replace it. Inline docs describe the API surface; library guides describe how to *use* it to get things done. Never duplicate what a godoc/rustdoc/typedoc already provides. +6. **Proportionality To Reader Journey**: A new feature can update guide examples without becoming the whole guide. Preserve each page's original reader goal and level of detail unless the feature changes that goal. ## Documentation Structure @@ -104,3 +105,4 @@ After writing: - **Keep getting-started ruthlessly short** — the reader's patience is at its lowest here; get them to "it works" fast - **Use reference-style links** — when linking between pages or to source files, use reference links for readability and maintainability - **Match the library's terminology** — use the same names the API uses, don't invent synonyms +- **Preserve proportional emphasis** — update feature-relevant examples and explanations without turning broad guides into release notes or single-feature walkthroughs. From f5fe724972281c17b8d328e261fc686a3669ce9d Mon Sep 17 00:00:00 2001 From: Timor Gruber Date: Sat, 9 May 2026 16:40:05 +0300 Subject: [PATCH 05/13] enhance planning-from-rfc skill to be as good as direct planning When the split happened, the model was lazy with the rfc variant, trying to defer to the direct variant directly, but in practice, that caused the agent to get confused and produce sub-par results. It also improves how opencode spawns subagents inline, using their own recommended way instead of some hallucinated one. --- .../planning-project-features-direct/SKILL.md | 38 ++- .../assets/master-plan-template.md | 13 +- .../references/runtime-claude.md | 8 + .../references/runtime-codex.md | 8 + .../references/runtime-opencode.md | 18 +- .../SKILL.md | 307 +++++++++++++++--- .../assets/master-plan-template.md | 62 +++- .../assets/sub-plan-template.md | 52 ++- .../references/runtime-claude.md | 14 + .../references/runtime-codex.md | 14 + .../references/runtime-opencode.md | 36 +- 11 files changed, 478 insertions(+), 92 deletions(-) diff --git a/dot_agents/skills/planning-project-features-direct/SKILL.md b/dot_agents/skills/planning-project-features-direct/SKILL.md index d735b34..189e550 100644 --- a/dot_agents/skills/planning-project-features-direct/SKILL.md +++ b/dot_agents/skills/planning-project-features-direct/SKILL.md @@ -15,15 +15,15 @@ This skill has one canonical workflow. Runtime files only map that workflow to t Before doing any work, determine the active runtime and read exactly one adapter: -- **OpenCode runtime** → [references/runtime-opencode.md](references/runtime-opencode.md) -- **Codex runtime** → [references/runtime-codex.md](references/runtime-codex.md) -- **Claude runtime** → [references/runtime-claude.md](references/runtime-claude.md) +- **OpenCode runtime** -> [references/runtime-opencode.md](references/runtime-opencode.md) +- **Codex runtime** -> [references/runtime-codex.md](references/runtime-codex.md) +- **Claude runtime** -> [references/runtime-claude.md](references/runtime-claude.md) -**Determining the active runtime**: Check the system prompt and environment banner for identifying markers (e.g., "OpenCode", "Claude Code", "Codex CLI"). If the signal is ambiguous, ask the user rather than guessing — reading the wrong adapter silently breaks assumptions downstream. +**Determining the active runtime**: Check the system prompt and environment banner for identifying markers (e.g., "OpenCode", "Claude Code", "Codex CLI"). If the signal is ambiguous, ask the user rather than guessing. -Do not load or mix instructions from the other runtime adapter in the same turn. If a runtime adapter conflicts with this file, this file is authoritative. +Do not load or mix instructions from another runtime adapter in the same turn. If a runtime adapter conflicts with this file, this file is authoritative. -**Terminology bridge**: This skill uses runtime-neutral terms. Claude's runtime calls execution bindings "worker agent definitions"; Codex's runtime calls them "dispatch recipes"; OpenCode's runtime uses custom subagent definitions. Reviewer bindings follow the same pattern. Use whichever term is native to the active runtime when writing or reading concrete artifacts; the canonical workflow terms are used only in this file. +**Terminology bridge**: This skill uses runtime-neutral terms. Claude's runtime calls execution bindings "worker agent definitions"; Codex's runtime calls them "dispatch recipes"; OpenCode's runtime uses custom subagent definitions. Reviewer bindings follow the same pattern. Use whichever term is native to the active runtime when writing or reading concrete artifacts. ## Core Principles @@ -100,7 +100,7 @@ This is the most critical phase. Break the feature into sub-plans: Present the decomposition to the user for review before writing the actual plan files. -### Phase 4: Plan Creation, Reviewer Assignment & Model Selection +### Phase 4: Plan Creation, Reviewer Assignment, Model Selection, And Execution Binding Only after Phases 1-3 are complete: @@ -154,21 +154,23 @@ When in doubt, prefer one tier up — the cost of a wrong model choice is rework Document the recommendation in each sub-plan's `## Execution Model` field with a brief rationale. -6. **Establish execution bindings for execution**: Each sub-plan's model + skills combination needs a matching runtime-specific execution binding. The active runtime adapter defines what that binding looks like — for example, a persistent worker definition, a reusable dispatch recipe, or another runtime-native mechanism. Natural language alone is not a reliable way to control model selection or preload the right skills. +6. **Establish execution bindings for execution**: Each sub-plan's model + skills combination needs a matching runtime-specific execution binding. The active runtime adapter defines what that binding looks like. Natural language alone is not a reliable way to control model selection or preload the right skills. - **Search for existing execution bindings**: Check the locations and mechanisms defined by the active runtime adapter. A binding is a match if it covers the sub-plan's required model tier and skill set. A partial match (correct model but incomplete skills, or correct skills but different model) can serve as a basis for an updated binding — adapt rather than starting from scratch. + **Search for existing execution bindings**: Check the locations and mechanisms defined by the active runtime adapter. A binding is a match if it covers the sub-plan's required model tier and skill set. A partial match can serve as a basis for an updated binding. - **Establish missing execution bindings**: If no matching binding exists, establish one using the mechanism defined by the active runtime adapter. If the runtime uses persistent bindings, create the required artifact. If the runtime uses ephemeral bindings, record the binding parameters in the runtime-specific way so retries and resumed execution use the same model and skills. The binding must make the sub-plan's model choice and required skills explicit enough that execution does not depend on prompt inference. + **Establish missing execution bindings**: If no matching binding exists, establish one using the mechanism defined by the active runtime adapter. If the runtime uses persistent bindings, create the required artifact. If the runtime uses ephemeral bindings, record the binding parameters in the runtime-specific way so retries and resumed execution use the same model and skills. - **Naming and placement**: Follow the active runtime adapter's conventions so the binding is discoverable by that runtime. - **Model control**: Set the target model using the runtime's actual model-selection mechanism. - **Skill preload**: Make the required skills explicit using the runtime's actual skill-loading mechanism. - **Identity/prompt**: Keep the binding itself minimal. The sub-plan provides the task context; the binding provides model, skills, and runtime-native agent identity. - **Create a test author binding**: If any sub-plan has testable acceptance criteria, create a single test author binding for the project. It always uses the **most capable model** — the task is finite (write tests from acceptance criteria, confirm they fail) and critical enough to justify the investment. Preload the project's testing and code-writing skills, plus `test-driven-development` if it's available. All sub-plans with testable AC share this single binding. + **Create a test author binding**: If any sub-plan has testable acceptance criteria, create or reuse one shared project-local test-author binding at the most capable model tier. Preload the project's testing and code-writing skills, plus `test-driven-development` if available. All sub-plans with testable AC share this single binding. **Warn the user**: If the runtime adapter says newly established persistent bindings require discovery, reload, or session restart before they become available, note that when presenting the plan. +7. **Write lead-agent instructions into the master plan**: Multi-sub-plan plans must include explicit worker-dispatch instructions, coordination points, file ownership, cross-sub-plan data flow, and post-execution checks. The executor should not have to infer these mechanics from the planning skill. + ### Phase 5: Initial Review Loop After plan creation and reviewer assignment, run an iterative review process **once, before the user sees the plan**. The loop continues until all reviewers report no new findings. This is the only automatic plan review — post-feedback revisions follow a lighter process (see Phase 6). @@ -238,14 +240,15 @@ Before presenting, run a final consistency check: 1. **Model assignments** — verify each sub-plan's execution model matches the decision tree and any explicit rules 2. **Skill conformance** — verify that reviewer-driven changes haven't introduced patterns that contradict required skills listed in each sub-plan -3. **Cross-sub-plan prerequisites** — verify that every interface, type, or file referenced in a sub-plan's Prerequisites section is created by an earlier sub-plan in the dependency graph -4. **Integration contract integrity** — verify that every "Produces" entry in one sub-plan has a matching "Consumes" entry in another (and vice versa), that the master plan's data flow table covers all cross-boundary data paths, and that interface wiring is assigned to a specific sub-plan when methods are accessed through interfaces -5. **DAG validity** — verify the execution order forms a valid DAG: no cycles, no sub-plan depends on output from a later or same-group sub-plan, and every "Consumes" reference points to a sub-plan that completes before the consumer starts -6. **Anchor boundaries** — if an active feature anchor exists, verify the plan does not duplicate anchor content and that any sub-plan using `anchoring-context` has a feature-level reason to update it +3. **Execution bindings** — verify every multi-sub-plan worker assignment has a runtime-specific binding and lead-agent dispatch instructions +4. **Cross-sub-plan prerequisites** — verify that every interface, type, or file referenced in a sub-plan's Prerequisites section is created by an earlier sub-plan in the dependency graph +5. **Integration contract integrity** — verify that every "Produces" entry in one sub-plan has a matching "Consumes" entry in another (and vice versa), that the master plan's data flow table covers all cross-boundary data paths, and that interface wiring is assigned to a specific sub-plan when methods are accessed through interfaces +6. **DAG validity** — verify the execution order forms a valid DAG: no cycles, no sub-plan depends on output from a later or same-group sub-plan, and every "Consumes" reference points to a sub-plan that completes before the consumer starts +7. **Anchor boundaries** — if an active feature anchor exists, verify the plan does not duplicate anchor content and that any sub-plan using `anchoring-context` has a feature-level reason to update it Fix any inconsistencies before proceeding. -Present the fully reviewed plan (master + sub-plans) along with a summary of review findings and how they were addressed. Only mark as ready when the user explicitly approves. +Present the fully reviewed plan (master + sub-plans) along with a review summary, worker-dispatch summary, and how findings were addressed. Only mark as ready when the user explicitly approves. **Remind the user**: The plan intentionally omits implementation details — those are the executing agent's responsibility, guided by loaded skills. The user reviews architecture and constraints now; they review actual code after execution. This is by design, not a gap. @@ -297,6 +300,7 @@ The documentation sub-plan follows the standard sub-plan template but its implem - **Which existing docs to update** — file paths, which sections, what to change - **Which new docs to create** — file paths, which existing doc to use as a structural pattern, what the new doc should cover - **Structural pattern matching** — if existing docs follow a pattern (e.g., process steps link to sub-process docs), new additions must follow it. Specify the pattern explicitly. +- **Proportionality guard** — the target document's purpose, audience, and existing abstraction level outrank feature-local emphasis. Specify whether the feature should be described as a primary concept, a small example, or only as an implementation detail. Do not let a narrow feature become the center of a broad domain, architecture, or process doc unless the feature actually changes that doc's central subject. - **Required skills**: List the documenting skills the executing agent needs (e.g., `documenting-business-processes` for new process docs, `documenting-domain` for new domain entries) - **Execution model**: Always assign the most capable available model. Documentation requires understanding the full feature context, making judgments about what to include, and producing clear prose — this is not rote work. @@ -310,7 +314,7 @@ Skip the documentation sub-plan when: ## Rules (Non-Negotiable) - **Always respect model assignments during execution** — Sub-plan model assignments are deliberate cost-optimization decisions. When executing a plan, the assigned model MUST be used via the active runtime's actual model-selection mechanism. If a sub-agent fails at the assigned model, diagnose and fix the failure (e.g., permission mode, tool access). Never silently fall back to executing the work on a more expensive model. If the issue cannot be resolved, stop and ask the user how to proceed. -- **Use runtime-specific execution bindings for multi-plan execution** — When a plan has 2+ sub-plans, dispatch each sub-plan through its assigned execution binding from Phase 4 step 6. That binding is the reliable mechanism for controlling model selection and skill preload; prompt wording alone is not. Run independent sub-plans in parallel where the runtime supports it. The lead coordinates handoffs between sequential sub-plans by relaying information (sub-agents cannot communicate with each other). If a binding fails, diagnose and retry — do not silently execute on the main agent or fall back to a more expensive model. If the issue cannot be resolved, STOP and ask the user. +- **Use runtime-specific execution bindings for multi-sub-plan execution** — When a plan has two or more sub-plans, dispatch each sub-plan through its assigned execution binding from Phase 4 step 6. That binding is the reliable mechanism for controlling model selection and skill preload; prompt wording alone is not. Run independent sub-plans in parallel where the runtime supports it. The lead coordinates handoffs between sequential sub-plans by relaying information (sub-agents cannot communicate with each other). If a binding fails, diagnose and retry once. If it still cannot be resolved, stop and ask the user. Never silently execute on the coordinator or a more expensive model. - **Planner owns review output** — The planner passes the review output file path to each reviewer. Write-capable reviewers write the file directly; read-only reviewers return findings as their response, and the planner writes the file on their behalf. The planner checks whether the file exists after the reviewer finishes. - **Never write a plan based on incomplete information** - **Never invent requirements the user didn't specify** diff --git a/dot_agents/skills/planning-project-features-direct/assets/master-plan-template.md b/dot_agents/skills/planning-project-features-direct/assets/master-plan-template.md index 250905e..137f64b 100644 --- a/dot_agents/skills/planning-project-features-direct/assets/master-plan-template.md +++ b/dot_agents/skills/planning-project-features-direct/assets/master-plan-template.md @@ -32,13 +32,13 @@ The master plan is the orchestration document. It does NOT contain implementatio ## Execution via Worker Agents -**Worker agents are REQUIRED for plans with 2+ sub-plans.** Each sub-plan's model + skills combination maps to a worker agent definition (created during Phase 4). Worker agents are the only reliable mechanism for controlling sub-agent model selection — model requests via natural language prompts or team configuration are unreliable. +**Worker agents are REQUIRED for plans with 2+ sub-plans.** Each sub-plan's model + skills combination maps to a worker agent definition or dispatch recipe created during planning. Worker agents are the only reliable mechanism for controlling sub-agent model selection; model requests via natural language prompts or team configuration are unreliable. **The only exception** — skip worker agents when: - Single sub-plan (just execute directly) - All sub-plans are trivially small (e.g., "add one import") -**Worker Agents** (created during planning): +**Worker Agents**: | Sub-Plan | Implementer Worker | Test Author Worker | Model Tier | |----------|-------------------|-------------------|------------| | 01 | `--worker` | `-test-author-worker` | | @@ -65,11 +65,14 @@ The master plan is the orchestration document. It does NOT contain implementatio **Lead Agent Instructions**: - Use this master plan as the roadmap -- Spawn each sub-plan's assigned worker agent from the table above -- Run sub-plans in the same parallel group concurrently where the framework supports it +- Before editing implementation files, initialize or resume `progress.md` and fill the execution audit with planned workers, model tiers, dispatch mechanisms, and TDD gate status +- Spawn each sub-plan's assigned worker agent from the table above using the active runtime adapter's dispatch mechanism. Do not self-execute assigned worker tasks in the coordinator context +- Run sub-plans in the same parallel group concurrently where the runtime supports it and file ownership does not conflict - For sequential dependencies, wait for the prior worker to complete before spawning the next - The lead relays information between workers when needed (workers cannot communicate directly) -- Pass the sub-plan file path and any prerequisite context when spawning +- Pass the sub-plan file path and any prerequisite context when spawning implementer workers +- For test-author workers, pass only acceptance criteria and code-surface context through an isolated workspace; do not pass plan paths, feature names, or design rationale. Same-workspace subagent invocation is not enough for structural TDD unless the runtime can prove it routes the worker into the isolated workspace. +- If a worker binding, model assignment, or TDD isolation mechanism cannot be used, diagnose and retry once. If it still cannot be used, stop and ask the user rather than falling back to coordinator execution or a different model tier - Synthesize results when all sub-plans finish **Coordination Points**: diff --git a/dot_agents/skills/planning-project-features-direct/references/runtime-claude.md b/dot_agents/skills/planning-project-features-direct/references/runtime-claude.md index c23d629..1eceae0 100644 --- a/dot_agents/skills/planning-project-features-direct/references/runtime-claude.md +++ b/dot_agents/skills/planning-project-features-direct/references/runtime-claude.md @@ -55,6 +55,14 @@ Claude execution bindings are **file-defined worker agents** under the agent dir - Re-run only affected reviewers during convergence; do not restart the full review. - The planner is responsible for checking that each expected review artifact was actually created, regardless of whether the reviewer wrote it directly or returned findings. +## Execution Dispatch + +Direct feature plans with two or more sub-plans must include concrete lead-agent instructions and worker tables in the master plan. During execution, launch the assigned Claude worker agents rather than recreating their persona in prompt text. Do not rely on prompt wording to pick the right model, and do not let the coordinator execute a sub-plan directly when the plan assigned a worker or model tier. + +## TDD Isolation Mechanics + +If any sub-plan has testable acceptance criteria, the shared test-author worker must be paired with an isolation mechanism. Prefer Worktrunk when available, then Claude's native worktree mechanism, then `git worktree`. If this cannot be verified, the plan must say that structural TDD is blocked or explicitly skipped with a concrete reason; generic "runtime cannot isolate" language is not sufficient when a worktree plus worker dispatch path is available. + ## Model Assignment - Use the worker or reviewer definition's explicit `model` field as the source of truth. diff --git a/dot_agents/skills/planning-project-features-direct/references/runtime-codex.md b/dot_agents/skills/planning-project-features-direct/references/runtime-codex.md index b329505..e718666 100644 --- a/dot_agents/skills/planning-project-features-direct/references/runtime-codex.md +++ b/dot_agents/skills/planning-project-features-direct/references/runtime-codex.md @@ -50,6 +50,14 @@ This adapter maps the canonical planning workflow in `../SKILL.md` to Codex-nati - Pass only the plan path, the review output location, and the review task. Do not mix in unrelated planning rationale. - When re-reviewing after changes, re-run only the affected reviewers, matching the canonical workflow. +## Execution Dispatch + +Direct feature plans with two or more sub-plans must include concrete lead-agent instructions and worker/dispatch-recipe tables in the master plan. During execution, use Codex's actual worker dispatch mechanism with the recipe's explicit model and skills. Do not rely on prompt text alone to pick the right model, and do not let the coordinator execute a sub-plan directly when the plan assigned a worker or model tier. + +## TDD Isolation Mechanics + +If any sub-plan has testable acceptance criteria, the test-author dispatch recipe must be paired with an isolation mechanism. Prefer `wt` when available in the project workflow, then dispatch the test author into the isolated workspace. If this cannot be verified, the plan must say that structural TDD is blocked or explicitly skipped with a concrete reason; generic "runtime cannot isolate" language is not sufficient when a worktree plus worker dispatch path is available. + ## Model Assignment - Use Codex's explicit model-selection mechanism in the reviewer or worker dispatch path rather than prompt-only requests. diff --git a/dot_agents/skills/planning-project-features-direct/references/runtime-opencode.md b/dot_agents/skills/planning-project-features-direct/references/runtime-opencode.md index 23bc422..e10b386 100644 --- a/dot_agents/skills/planning-project-features-direct/references/runtime-opencode.md +++ b/dot_agents/skills/planning-project-features-direct/references/runtime-opencode.md @@ -17,7 +17,7 @@ This adapter maps the canonical planning workflow in `../SKILL.md` to OpenCode-n - Project-local OpenCode agents: `.opencode/agents/` within the repository - Global OpenCode agents: `~/.config/opencode/agents/` -**Dotfile-source note**: If the project itself is a chezmoi source directory or another dotfile source repo, use the source-path equivalents when creating global artifacts (for example `dot_config/opencode/agents/` rather than editing `~/.config/opencode/agents/` directly). +**Dotfile-source note**: If the project itself is a chezmoi source directory or another dotfile source repo, use the source-path equivalents when creating global artifacts (for example `private_dot_config/opencode/agents/` rather than editing `~/.config/opencode/agents/` directly). **Precedence**: When the same agent name exists in both places, project-local wins. When searching for reusable reviewer or execution bindings, check project-local first, then global. @@ -76,6 +76,22 @@ Do not rely on prompt text alone to pick the right model, and do not rely on the - Re-run only affected reviewers during convergence; do not restart the full review. - The planner remains responsible for checking that each expected review artifact was actually created. +## Execution Dispatch + +Direct feature plans with two or more sub-plans must include concrete lead-agent instructions and worker tables in the master plan. During execution, OpenCode workers are invoked through the runtime's native subagent mechanism. In interactive sessions, `@` mention is a valid native invocation path when it can invoke the named worker and preserve the worker's configured model and permissions. + +The CLI is an acceptable fallback when the current runtime surface cannot invoke project-local custom subagents directly, or when explicit workspace routing is required: + +```bash +opencode run --agent --dir "" +``` + +Do not rely on prompt text alone to pick the right model, and do not let the coordinator execute a sub-plan directly when the plan assigned a worker or model tier. + +## TDD Isolation Mechanics + +If any sub-plan has testable acceptance criteria, the test-author binding must be paired with an isolation mechanism. Prefer `wt` when available in the project workflow, then dispatch the test author into the isolated workspace. Same-workspace `@` invocation is not sufficient for structural TDD unless the runtime can prove it routes that subagent into the isolated worktree. Acceptable routing includes a verified native isolated-workspace dispatch mechanism or `opencode run --agent --dir `. If this cannot be verified, the plan must say that structural TDD is blocked or explicitly skipped with a concrete reason; generic "runtime cannot isolate" language is not sufficient when `wt` plus either native isolated-workspace dispatch or `opencode run --dir` is available. + ## Model Assignment - Use the custom subagent's explicit `model` field as the source of truth. diff --git a/dot_agents/skills/planning-project-features-from-rfc/SKILL.md b/dot_agents/skills/planning-project-features-from-rfc/SKILL.md index 9202ad2..2d9b226 100644 --- a/dot_agents/skills/planning-project-features-from-rfc/SKILL.md +++ b/dot_agents/skills/planning-project-features-from-rfc/SKILL.md @@ -1,11 +1,19 @@ --- name: planning-project-features-from-rfc -description: Create implementation plans from a reviewed RFC. Uses the RFC as the approved design baseline, decomposes it into executable sub-plans, and runs only RFC-fidelity and executability review. +description: Create implementation plans from a reviewed RFC. Uses the RFC as the approved design baseline, decomposes it into executable sub-plans, and runs RFC-specific plan review. --- # Planning Project Features From RFC -Create implementation plans from a reviewed RFC. The RFC owns the design; this skill owns decomposition, execution ordering, file ownership, acceptance criteria, model and skill assignment, and execution bindings. +Create thorough, actionable implementation plans for features within a single +project from a reviewed RFC. The RFC owns requirements, design decisions, +constraints, goals, non-goals, risks, and contracts. This skill owns the same +planning mechanics as direct feature planning: decomposition, dependency graph, +file ownership, acceptance criteria, required skills, model assignments, +execution bindings, and execution orchestration instructions. + +The RFC replaces direct planning's open-ended requirement gathering and design +review. It does not replace the direct planning workflow's execution discipline. ## Runtime Binding @@ -17,57 +25,117 @@ Before doing any work, determine the active runtime and read exactly one adapter - **Codex runtime** -> [references/runtime-codex.md](references/runtime-codex.md) - **Claude runtime** -> [references/runtime-claude.md](references/runtime-claude.md) -Do not load or mix instructions from another runtime adapter in the same turn. If a runtime adapter conflicts with this file, this file is authoritative. +**Determining the active runtime**: Check the system prompt and environment +banner for identifying markers (e.g., "OpenCode", "Claude Code", "Codex +CLI"). If the signal is ambiguous, ask the user rather than guessing. + +Do not load or mix instructions from another runtime adapter in the same turn. +If a runtime adapter conflicts with this file, this file is authoritative. + +**Terminology bridge**: This skill uses runtime-neutral terms. Claude's runtime +calls execution bindings "worker agent definitions"; Codex's runtime calls them +"dispatch recipes"; OpenCode's runtime uses custom subagent definitions. +Reviewer bindings follow the same pattern. Use whichever term is native to the +active runtime when writing or reading concrete artifacts. ## Core Principles 1. **RFC Baseline**: Treat the reviewed RFC as the approved source of design decisions, constraints, goals, non-goals, risks, and contracts. -2. **Planning Owns Mechanics**: The planner creates sub-plan boundaries, dependency graph, execution ordering, file ownership, acceptance criteria, required skills, model assignments, and execution bindings. -3. **No Design Re-Litigation**: Do not reopen RFC architecture, risk, or tradeoff decisions during planning. -4. **No Silent Deviations**: If planning requires changing the RFC design, stop and ask whether to revise the RFC or explicitly approve a plan deviation. -5. **Minimal Review**: Reviewed RFC-backed plans use only `plan-rfc-fidelity-reviewer` and `plan-executability-reviewer` before user approval. +2. **No Design Re-Litigation**: Do not reopen RFC architecture, risk, or tradeoff decisions during planning. +3. **No Silent Deviations**: If planning requires changing the RFC design, stop and ask whether to revise the RFC or explicitly approve a plan deviation. +4. **Direct-Planning Mechanics Remain**: RFC-backed planning uses the direct workflow's decomposition, model selection, execution binding, worker-dispatch, documentation sub-plan, and approval mechanics unless this file explicitly replaces a step for RFC reasons. +5. **Atomic Decomposition**: Break work into the smallest self-contained sub-plans possible. Each sub-plan should be executable in isolation. +6. **Embedded Context**: Each sub-plan includes everything an executing agent needs. The agent should not have to read the RFC, master plan, or other sub-plans to understand its assigned work. +7. **RFC-Specific Review**: Reviewed RFC-backed plans use `plan-rfc-fidelity-reviewer` and `plan-executability-reviewer`. The RFC's own architecture, risk, and clarity reviews replace direct planning's full design review loop. ## Workflow ### Phase 1: Validate RFC Baseline +Use this workflow only after `planning-project-features` has routed here because +a reviewed RFC exists and the user wants an RFC-backed plan. + Read the RFC and confirm: - `rfc-architect-reviewer` is `Passed` or `Passed with concerns`. - `rfc-risk-reviewer` is `Passed` or `Passed with concerns`. +- `rfc-clarity-reviewer` is `Passed`, `Passed with concerns`, or explicitly not required by the project. - No `Blocking` review status remains. - Any `Passed with concerns` item is compatible with planning and does not require a design decision before decomposition. If validation fails, stop and return to `planning-project-features` routing. Do not continue as RFC-backed planning. -Read the active feature anchor if one exists. The anchor may add handoff context, but it does not override the RFC. +Look for an active feature anchor using project conventions, then `docs/context/-anchor.md`. +If one exists, read it as supporting handoff context. +The anchor can explain intent, rejected alternatives, open questions, or teammate handoff context, but it does not +override the RFC. Any unresolved anchor question that conflicts with the RFC or affects decomposition, scope, +acceptance criteria, or contracts is a planning blocker to ask about. + +### Phase 2: Planning-Focused Codebase Exploration + +Read existing documentation first, then inspect code only for planning mechanics. +Existing documentation is dramatically cheaper than re-exploring code from scratch. -### Phase 2: Planning-Focused Exploration +1. **Read existing docs first**: Check AGENTS.md for documentation pointers, then read relevant docs (domain, architecture, business processes, components). +2. **Explore code only for gaps**: Search for relevant code, patterns, and conventions that documentation and the RFC do not cover. +3. Confirm file paths and ownership boundaries needed for sub-plans. +4. Confirm existing interfaces, commands, schemas, config files, or runtime bindings named by the RFC. +5. Identify tests, packages, or verification scopes that acceptance criteria can reference. +6. **Identify required skills**: Determine which skills available in the active runtime the executing agent will need to follow project conventions correctly. Check the project's `AGENTS.md` for documented skill mappings, then use the active runtime adapter for exact discovery/loading mechanics. +7. **Flag documentation gaps**: If critical areas needed for the plan are undocumented, note them. Recommend the appropriate documenting skill: + - Missing domain knowledge -> `documenting-domain` + - Missing architecture overview -> `documenting-architecture` + - Missing business workflow docs -> `documenting-business-processes` + - Missing component docs -> `documenting-components` -Read existing docs first, then inspect code only for planning mechanics: + Present gaps to the user. They may want to create docs before planning continues, or accept the gap and proceed. +8. **Flag RFC/specification gaps**: When implementation planning depends on behavior the RFC does not define, treat it as a blocking gap. Do not fill it with a reasonable default. Ask whether to revise the RFC or approve an explicit plan deviation. -- Confirm file paths and ownership boundaries needed for sub-plans. -- Confirm existing interfaces, commands, schemas, or config files named by the RFC. -- Identify required skills for execution. -- Identify tests, packages, or verification scopes that acceptance criteria can reference. +Do not use exploration to redesign the RFC. If exploration contradicts the RFC, +stop and ask whether to revise the RFC or abandon RFC-backed planning. -Do not use exploration to redesign the RFC. If exploration contradicts the RFC, stop and ask whether to revise the RFC or abandon RFC-backed planning. +Share planning-relevant findings with the user and confirm understanding before +writing plan files when the findings affect decomposition, sequencing, or scope. ### Phase 3: Decomposition -Break the RFC into sub-plans: +This is the most critical phase. Break the RFC into sub-plans: + +1. **Identify natural boundaries**: Look for seams in the work: different layers, domains, files, modules, or independently verifiable outcomes. +2. **Minimize dependencies and enforce DAG ordering**: Each sub-plan should depend on as few other sub-plans as possible. Where dependencies exist, make them explicit and one-directional. The dependency graph must form a valid DAG. No sub-plan may depend on information produced by a later sub-plan or a sub-plan in the same parallel group. Sub-plans cannot communicate with each other at runtime; the lead agent relays results strictly along dependency edges. If decomposition requires bidirectional information flow, merge or restructure the boundaries. +3. **Embed RFC context and cross-boundary contracts**: Each sub-plan must be self-contained. What belongs in a sub-plan: + - RFC decisions, constraints, goals, non-goals, and risks that govern this unit. + - Domain knowledge the agent cannot derive from code: business rules, config formats, protocol details, or accepted tradeoffs. + - Cross-boundary contracts: exact interface/type signatures, files, data shapes, or commands that other sub-plans depend on. When sub-plans run in parallel, the consuming agent cannot discover these at execution time, so the plan must specify them. For sequential dependencies, the later agent can read the earlier sub-plan's actual output; no pre-specified contract is needed unless the earlier output constrains the later work. + + Cross-boundary contracts must satisfy three additional integrity rules: + + - **Caller annotations**: Every new public method/function introduced by a sub-plan must specify its production caller. If the caller lives in a different sub-plan, both sides must reference the contract: the producer documents "called by: sub-plan N, in `Location`", the consumer documents "calls: `Method` from sub-plan M". No orphan methods. + - **Connected data flow**: When data must flow between components owned by different sub-plans, the master plan must trace the full path: source -> transport mechanism -> destination, with sub-plan ownership at each hop. Prose descriptions like "X stores the value on config" are insufficient when the consumer needs it delivered through a channel no sub-plan was told to wire. + - **Interface boundary checks**: If a sub-plan adds a method to a concrete type, but consumers access that type through an interface, the plan must either add the method to the interface or explicitly assign the concrete-type wiring to a specific sub-plan. + + What does NOT belong: method bodies, private helper design, step-by-step coding instructions, exact commands for testing/linting/building, or design decisions already owned by skills. +4. **Preserve RFC non-goals**: Do not add work the RFC explicitly excluded. If implementation pressure suggests a non-goal should change, stop and ask whether to revise the RFC or approve an explicit deviation. +5. **Translate RFC risks into mechanics**: RFC risks become plan constraints, acceptance criteria, sequencing notes, rollback notes, or handoff constraints where relevant. +6. **Keep sub-plans small**: A good sub-plan should be completable in a single focused session. If it feels too big, split it further. +7. **Skills are the agent's authority, not the plan's**: List the skills each sub-plan requires, but do not replicate skill content into the plan. Skills define how to write code, how to test, how to lint, and how to build. The plan defines what to build and why. If a project has skills for testing, linting, or building, list them in Required Skills and write acceptance criteria at the behavioral/verification level rather than raw command level unless the RFC or project lacks an operational skill. +8. **Sub-plans must be decisive**: Before writing a design decision, verify it against the RFC and codebase. Do not write "if X exists" or "either A or B". Pick the RFC-approved approach or stop for clarification. +9. **Plan documentation updates as a sub-plan**: If the feature affects documented domain concepts, architecture, or business processes, add a final documentation sub-plan. See [Documentation Sub-Plan](#documentation-sub-plan). + +Present the decomposition to the user for review before writing the actual plan +files. + +### Phase 4: Plan Creation, Model Selection, And Execution Binding -1. **Identify execution seams**: Split work by files, layers, domains, and independently verifiable outcomes. -2. **Create a DAG**: The dependency graph is a planning artifact. It must be acyclic, explicit, and based on execution prerequisites created by the sub-plans. -3. **Embed RFC context**: Each sub-plan must include the RFC decisions, constraints, and contract details it needs. Executing agents should not have to read the RFC. -4. **Preserve non-goals**: Do not add work the RFC explicitly excluded. -5. **Translate risks into mechanics**: RFC risks become plan constraints, acceptance criteria, sequencing notes, or handoff constraints where relevant. +Only after Phases 1-3 are complete: -Present the decomposition to the user for review before writing plan files. +1. **Create the plan directory and files**: -### Phase 4: Plan Creation And Execution Binding + **Plan location depends on context:** + - Standalone feature: `plans/features//` + - Feature belonging to an epic: `plans/epics///` -Create the plan directory and files using the templates in this skill's `assets/` directory: + If the user references an epic plan or provides an epic context, use the epic path. Otherwise, default to standalone. ```text / @@ -78,49 +146,188 @@ Create the plan directory and files using the templates in this skill's `assets/ └── ... ``` -The master plan must include the RFC path and review summary. Each sub-plan must include enough RFC-derived context to be self-contained. +2. **Assign execution models**: For each sub-plan, assess complexity and recommend an execution model. This enables cost optimization by using cheaper models for straightforward work while reserving the most capable model for planning, review, and difficult execution. + +**Model Selection Decision Tree** — evaluate top-down, use the first tier that fits: + +1. **Most capable model** — use when the sub-plan involves: + - Ambiguous or underspecified requirements that need interpretation + - Multi-step reasoning across multiple systems or domains + - Novel architectural approaches with no existing pattern to follow + - Security-sensitive operations where mistakes are costly + - Documentation updates (see [Documentation Sub-Plan](#documentation-sub-plan)) + +2. **Mid-tier model** — use when the sub-plan involves: + - Complex business logic with multiple edge cases + - Integration of multiple systems or packages + - Performance-critical code requiring careful trade-offs + - State machines or error handling with recovery logic + +3. **Cheapest model** — use when the sub-plan involves: + - Following established patterns already present in the codebase + - CRUD operations, straightforward integrations, configuration changes + - File moves/renames, simple data transformations + - Test writing for existing code with clear acceptance criteria + +When in doubt, prefer one tier up. Document the recommendation in each +sub-plan's `## Execution Model` field with a brief rationale. + +3. **Establish execution bindings for execution**: Each sub-plan's model + skills combination needs a matching runtime-specific execution binding. The active runtime adapter defines what that binding looks like. Natural language alone is not a reliable way to control model selection or preload the right skills. + + **Search for existing execution bindings**: Check the locations and mechanisms defined by the active runtime adapter. A binding is a match if it covers the sub-plan's required model tier and skill set. A partial match can serve as a basis for an updated binding. + + **Establish missing execution bindings**: If no matching binding exists, establish one using the mechanism defined by the active runtime adapter. If the runtime uses persistent bindings, create the required artifact. If the runtime uses ephemeral bindings, record the binding parameters in the runtime-specific way so retries and resumed execution use the same model and skills. + + - **Naming and placement**: Follow the active runtime adapter's conventions so the binding is discoverable. + - **Model control**: Set the target model using the runtime's actual model-selection mechanism. + - **Skill preload**: Make the required skills explicit using the runtime's actual skill-loading mechanism. + - **Identity/prompt**: Keep the binding minimal. The sub-plan provides task context; the binding provides model, skills, and runtime-native agent identity. + + **Create a test author binding**: If any sub-plan has testable acceptance criteria, create or reuse one shared project-local test-author binding at the most capable model tier. Preload the project's testing and code-writing skills, plus `test-driven-development` if available. All sub-plans with testable AC share this single binding. + + **Warn the user**: If the runtime adapter says newly established persistent bindings require discovery, reload, or session restart before they become available, note that when presenting the plan. -Assign required skills and execution models exactly as `planning-project-features-direct` does. Establish execution bindings using the active runtime adapter. +4. **Write lead-agent instructions into the master plan**: Multi-sub-plan plans must include explicit worker-dispatch instructions, coordination points, file ownership, cross-sub-plan data flow, and post-execution checks. The executor should not have to infer these mechanics from the planning skill. -### Phase 5: Minimal Review Loop +### Phase 5: RFC-Specific Review Loop -Run only these master-plan reviewers: +After plan creation, run an iterative review process before the user sees the plan. +RFC-backed planning uses exactly these reviewers by default: - **`plan-rfc-fidelity-reviewer`**: Checks that the plan faithfully decomposes the RFC without contradicting it, omitting required context, or adding unapproved design scope. -- **`plan-executability-reviewer`**: Checks file ownership, acceptance criteria, dependency order, verification scope, and isolated execution mechanics. +- **`plan-executability-reviewer`**: Checks file ownership, acceptance criteria, dependency order, verification scope, worker-dispatch mechanics, model enforcement, and isolated execution mechanics. -Pass the plan directory, RFC path, and review output path to each reviewer. Review output lives in the plan's `reviews/` directory. +Do not run `plan-architect-reviewer`, `plan-risk-reviewer`, or +`plan-clarity-reviewer` in this workflow unless the user explicitly exits +RFC-backed planning and returns to direct planning. Those design reviewers belong +to direct planning; RFC architecture, risk, and clarity were already reviewed by +RFC reviewers. -Incorporate findings and re-run only affected reviewers until both report no blocking findings. If a finding requires changing the RFC design, stop and ask whether to revise the RFC or approve an explicit deviation. +Pass the plan directory, RFC path, and review output path to each reviewer. +Review output lives in the plan's `reviews/` directory, for example: -Do not run `plan-architect-reviewer`, `plan-risk-reviewer`, or `plan-clarity-reviewer` in this workflow. Those reviewers belong to direct planning. RFC architecture, risk, and clarity were already reviewed by RFC reviewers. +```text +/reviews/ +├── 00-master.rfc-fidelity.md +└── 00-master.executability.md +``` + +If a reviewer has edit permission and writes its own review artifact, let it do +so. If it returns findings instead, write the review artifact from the response. +Verify the expected review artifact exists before continuing. + +Incorporate findings and re-run only affected reviewers until both report no +blocking findings. If a finding requires changing the RFC design, stop and ask +whether to revise the RFC or approve an explicit deviation. + +### Phase 6: User Approval And Feedback + +Before presenting, run a final consistency check: + +1. **RFC coverage** — every RFC goal and success criterion is represented in the plan. +2. **Non-goal preservation** — every RFC non-goal remains out of scope. +3. **Constraint propagation** — every RFC constraint that affects execution appears in the master plan or relevant sub-plan. +4. **Model assignments** — every sub-plan's execution model matches the decision tree and any explicit project rules. +5. **Execution bindings** — every multi-sub-plan worker assignment has a runtime-specific binding and lead-agent dispatch instructions. +6. **Skill conformance** — reviewer-driven changes do not introduce patterns that contradict required skills listed in each sub-plan. +7. **Cross-sub-plan prerequisites** — every interface, type, file, or artifact referenced in a sub-plan's Prerequisites section is created by an earlier sub-plan in the dependency graph. +8. **Integration contract integrity** — every "Produces" entry in one sub-plan has a matching "Consumes" entry in another when cross-sub-plan consumption exists, and the master plan's data flow table covers every cross-boundary data path. +9. **DAG validity** — execution order forms a valid DAG: no cycles, no sub-plan depends on output from a later or same-group sub-plan, and every consumed prerequisite points to an earlier group. +10. **Anchor boundaries** — if an active feature anchor exists, the plan does not duplicate anchor content and any sub-plan using `anchoring-context` has a feature-level reason to update it. +11. **Review status** — `plan-rfc-fidelity-reviewer` and `plan-executability-reviewer` findings are resolved or explicitly documented as non-blocking. + +Fix inconsistencies before proceeding. + +Present the reviewed plan with the RFC path, review summary, worker-dispatch +summary, and any explicit approved deviations. Only mark ready when the user +approves. + +**Remind the user**: The plan intentionally omits implementation details. The +user reviews architecture and constraints now; they review actual code after +execution. This is by design, not a gap. + +#### Handling User Feedback + +When the user requests changes, incorporate them and classify each change to +determine whether re-review is needed: -### Phase 6: User Approval +| Change Type | Examples | Re-review Action | +|---|---|---| +| Cosmetic / wording | Clarify a step description, rename a sub-plan, fix typos | None | +| Scoped implementation detail | Add an edge case to one sub-plan, change a file path, adjust a step | None — planner judgment is sufficient | +| Scope adjustment within a sub-plan | Add/remove acceptance criteria, change approach for one sub-plan | Re-review affected sub-plan through `plan-rfc-fidelity-reviewer` if RFC scope may be affected, and `plan-executability-reviewer` if mechanics changed | +| Structural change | New sub-plan added, dependency graph changed, boundaries shifted, sub-plans merged/split, ownership or verification scope changed across sub-plans | Re-review affected plan sections with both RFC-fidelity and executability reviewers | +| RFC design change | Goal, non-goal, contract, risk, or accepted design decision changes | Stop and ask whether to revise the RFC or approve an explicit plan deviation before re-review | -Before presenting, verify: +Default behavior: after incorporating feedback, state what changed and recommend +whether re-review is warranted. Do not automatically re-run reviewers unless the +workflow requires it or the user asks. -1. Every RFC goal and success criterion is represented in the plan. -2. Every RFC non-goal remains out of scope. -3. Every RFC constraint that affects execution appears in the master plan or relevant sub-plan. -4. Every sub-plan is self-contained. -5. The execution order is a valid DAG. -6. `plan-rfc-fidelity-reviewer` and `plan-executability-reviewer` findings are resolved or explicitly documented as non-blocking. +### Post-Execution: Component Documentation Review -Present the reviewed plan with the RFC path, review summary, and any explicit approved deviations. Only mark ready when the user approves. +If this project has component documentation, run the `component-docs-reviewer` +agent after all sub-plans complete to catch implementation-vs-plan drift in +component docs. ## Plan Structures -- **[Master plan template][master-plan-template]** -- **[Sub-plan template][sub-plan-template]** +Templates for plan files are in this skill's `assets/` directory. Read them when +creating plans: + +- **[Master plan template][master-plan-template]** — Orchestration document structure with RFC mapping and lead-agent execution mechanics. +- **[Sub-plan template][sub-plan-template]** — Self-contained execution unit structure with RFC context. + +## Documentation Sub-Plan + +When a feature affects documented domain concepts, architecture, or business +processes, add a documentation sub-plan as the final sub-plan in the execution +order. This makes doc updates part of the plan: visible, reviewable, and +deliberate. + +### What Goes in the Documentation Sub-Plan + +| Doc Level | Planned Upfront? | Rationale | +|---|---|---| +| Domain docs | Yes | The planner knows what domain concepts are changing | +| Architecture docs | Yes | The planner knows what structural changes are happening | +| Process docs | Yes | The planner knows what flows are being added/modified | +| Component docs | No — post-execution | Component docs describe implementation details that may drift from the plan | + +### How to Write It + +The documentation sub-plan follows the standard sub-plan template but its +implementation steps are doc edits, not code changes. Be specific: + +- **Which existing docs to update** — file paths, sections, and stale claims to replace. +- **Which new docs to create** — file paths, existing doc patterns to follow, and what the new doc should cover. +- **Structural pattern matching** — if existing docs follow a pattern, new additions must follow it. +- **Proportionality guard** — the target document's purpose, audience, and existing abstraction level outrank feature-local emphasis. The sub-plan should say whether the feature should be mentioned as a primary concept, a small example, or only as an implementation detail. Do not let a narrow feature become the center of a broad domain, architecture, or process doc unless the RFC explicitly changes that doc's central subject. +- **Required skills** — list the relevant documenting skills. +- **Execution model** — always assign the most capable available model. Documentation requires synthesis and judgment. + +### When to Skip It + +Skip the documentation sub-plan when: + +- The feature does not affect any documented concepts, flows, or architecture. +- The only doc impact is component-level and can be handled by post-execution component docs review. +- No project documentation exists yet; recommend creating initial docs as a separate effort. -## Rules +## Rules (Non-Negotiable) -- Never use this workflow with an unreviewed RFC. -- Never change the RFC design silently. -- Never run full direct-planning reviewers in this workflow. -- Never ask the user to restate information that is already in the RFC. -- Always embed execution-critical RFC context into each sub-plan. -- Always run `plan-rfc-fidelity-reviewer` and `plan-executability-reviewer` before presenting the plan. +- **Never use this workflow with an unreviewed RFC.** Return to `planning-project-features` routing instead. +- **Never change the RFC design silently.** Revise the RFC or record an explicit approved deviation. +- **Never ask the user to restate information that is already in the RFC.** Use the RFC as the approved baseline. +- **Always embed execution-critical RFC context into each sub-plan.** Executing agents should not need to read the RFC. +- **Always decompose into sub-plans.** A monolithic plan is a failure mode. +- **Each sub-plan must be self-contained.** Embed context, contracts, constraints, and acceptance criteria directly. +- **Always list required skills in every sub-plan.** An executing agent without the right skills will produce subpar results or get stuck. +- **Always respect model assignments during execution.** Sub-plan model assignments are deliberate cost-optimization decisions. The assigned model must be used through the active runtime's actual model-selection mechanism. +- **Use runtime-specific execution bindings for multi-sub-plan execution.** When a plan has two or more sub-plans, dispatch each sub-plan through its assigned execution binding. If a binding fails, diagnose and retry once. If it still cannot be resolved, stop and ask the user. Never silently execute on the coordinator or a more expensive model. +- **Write lead-agent execution instructions into every multi-sub-plan master plan.** Do not rely on the executor remembering this skill. +- **Always run `plan-rfc-fidelity-reviewer` and `plan-executability-reviewer` before presenting the plan.** +- **Never run full direct-planning design reviewers in this workflow unless the user explicitly exits RFC-backed planning.** RFC architecture, risk, and clarity were already reviewed at RFC time. +- **Save plans to the correct location.** Standalone features go to `plans/features//`; epic features go to `plans/epics///`. [master-plan-template]: assets/master-plan-template.md [sub-plan-template]: assets/sub-plan-template.md diff --git a/dot_agents/skills/planning-project-features-from-rfc/assets/master-plan-template.md b/dot_agents/skills/planning-project-features-from-rfc/assets/master-plan-template.md index e7972dd..a7606ad 100644 --- a/dot_agents/skills/planning-project-features-from-rfc/assets/master-plan-template.md +++ b/dot_agents/skills/planning-project-features-from-rfc/assets/master-plan-template.md @@ -1,6 +1,7 @@ # RFC-Backed Master Plan Template -The master plan is the orchestration document. It decomposes an approved RFC into executable sub-plans. It does NOT redefine the RFC design. +The master plan is the orchestration document. It decomposes an approved RFC into executable sub-plans. +It does NOT redefine the RFC design, and it DOES carry the same execution mechanics as direct feature plans. ```markdown # Master Plan: @@ -11,51 +12,102 @@ The master plan is the orchestration document. It decomposes an approved RFC int - **RFC Reviews**: `rfc-architect-reviewer` , `rfc-risk-reviewer` , `rfc-clarity-reviewer` ## Summary + ## RFC Scope Mapping + | RFC Item | Plan Coverage | |----------|---------------| | | | ## Explicit Deviations + ## Sub-Plans + | # | Sub-Plan | Depends On | Model | Description | |----|----------|------------|-------|-------------| | 01 | `01-.md` | - | Cheapest | | | 02 | `02-.md` | 01 | Mid-tier | | +| 03 | `03-.md` | - | Most capable | | ## Execution Order - + + + +- **Parallel group 1**: 01, 03 (no dependencies) +- **Sequential**: 02 (after 01) ## Execution via Worker Agents + +**Worker agents are REQUIRED for plans with 2+ sub-plans.** Each sub-plan's model + skills combination maps to a worker agent definition or dispatch recipe created during planning. Worker agents are the only reliable mechanism for controlling sub-agent model selection; model requests via natural language prompts or team configuration are unreliable. + +**The only exception** — skip worker agents when: + +- Single sub-plan (execute directly) +- All sub-plans are trivially small, such as one-line mechanical edits with no explicit model assignment + +**Worker Agents**: + | Sub-Plan | Implementer Worker | Test Author Worker | Model Tier | |----------|--------------------|--------------------|------------| | 01 | `--worker` | `-test-author-worker` | | +| 02 | `--worker` | - (no testable AC) | | +| 03 | `-docs-worker` | - (documentation task) | Most capable | + +**File Ownership**: -## File Ownership | Sub-Plan | Primary Files | |----------|---------------| | 01 | | +| 02 | | ## Cross-Sub-Plan Data Flow + + + | Data | Source (Sub-Plan) | Transport | Destination (Sub-Plan) | |------|-------------------|-----------|------------------------| -| | 01 | | 02 | +| | 01 — `` | | 02 — `` | + + + +**Lead Agent Instructions**: + +- Use this master plan as the roadmap. +- Before editing implementation files, initialize or resume `progress.md` and fill the execution audit with planned workers, model tiers, dispatch mechanisms, and TDD gate status. +- Spawn each sub-plan's assigned worker agent from the table above using the active runtime adapter's dispatch mechanism. Do not self-execute assigned worker tasks in the coordinator context. +- Run sub-plans in the same parallel group concurrently where the runtime supports it and file ownership does not conflict. +- For sequential dependencies, wait for the prior worker to complete before spawning the next. +- Relay prerequisite outputs between workers when needed; workers cannot communicate directly. +- Pass the sub-plan file path and any prerequisite context when spawning implementer workers. +- For test-author workers, pass only acceptance criteria and code-surface context through an isolated workspace; do not pass RFC paths, plan paths, feature names, or design rationale. Same-workspace subagent invocation is not enough for structural TDD unless the runtime can prove it routes the worker into the isolated workspace. +- If a worker binding, model assignment, or TDD isolation mechanism cannot be used, diagnose and retry once. If it still cannot be used, stop and ask the user rather than falling back to coordinator execution or a different model tier. +- Synthesize results when all sub-plans finish. + +**Coordination Points**: + + + +- After 01 completes: Pass to worker executing 02. +- If : Relay to affected workers. ## RFC Risk Coverage + | RFC Risk / Tradeoff | Plan Handling | |---------------------|---------------| | | | ## Review Summary + | Reviewer | Status | Notes | |----------|--------|-------| | `plan-rfc-fidelity-reviewer` | Pending | | | `plan-executability-reviewer` | Pending | | ## Post-Execution -If this project has component-level documentation, run the `component-docs-reviewer` agent to verify component docs still match the actual implementation. + +After implementation, run the final verification appropriate for this feature and verify documentation still matches the actual implementation. If this project has component-level documentation, run the `component-docs-reviewer` agent to verify component docs still match the actual implementation. ``` diff --git a/dot_agents/skills/planning-project-features-from-rfc/assets/sub-plan-template.md b/dot_agents/skills/planning-project-features-from-rfc/assets/sub-plan-template.md index 5ee9a00..255f5a4 100644 --- a/dot_agents/skills/planning-project-features-from-rfc/assets/sub-plan-template.md +++ b/dot_agents/skills/planning-project-features-from-rfc/assets/sub-plan-template.md @@ -1,12 +1,15 @@ # RFC-Backed Sub-Plan Template -Each sub-plan is a self-contained execution unit derived from the RFC. An agent should be able to execute it without reading the RFC or other sub-plans. +Each sub-plan is a self-contained execution unit derived from the RFC. An agent +should be able to execute it without reading the RFC, master plan, or other +sub-plans. ```markdown # Sub-Plan: ## Objective - + + ## RFC Context - **RFC**: `` @@ -14,37 +17,66 @@ Each sub-plan is a self-contained execution unit derived from the RFC. An agent - **Relevant constraints / non-goals**: ## Required Skills + + + - `skill-name` — reason it's needed ## Execution Model + **Recommended**: Cheapest | Mid-tier | Most capable -**Rationale**: + +**Rationale**: + +Examples: + +- Cheapest: "Straightforward configuration change following existing patterns" +- Mid-tier: "Shell/runtime change where regressions are costly" +- Most capable: "Documentation synthesis across final implementation state" ## Prerequisites -- + + + +- - Or: "None — this sub-plan has no dependencies" ## Context - + + ## Primary Files -- `path/to/file.ext` (create | modify) + + + +- `path/to/file.ext` (create | modify | delete) ## Integration Contracts + + + **Produces**: + - `Type.Method()` — called by: sub-plan NN, in `consumer/location.go` **Consumes**: -- calls: `Type.Method()` from sub-plan NN -**Interface wiring**: +- calls: `Type.Method()` from sub-plan NN - +**Interface wiring**: ## Design Decisions - + + + +Do NOT include method body implementations, exact code patterns, step-by-step coding instructions, or specific commands for testing/linting/building when skills cover those operations. ## Acceptance Criteria + - [ ] - [ ] + + ``` diff --git a/dot_agents/skills/planning-project-features-from-rfc/references/runtime-claude.md b/dot_agents/skills/planning-project-features-from-rfc/references/runtime-claude.md index a0f9226..5bacfd8 100644 --- a/dot_agents/skills/planning-project-features-from-rfc/references/runtime-claude.md +++ b/dot_agents/skills/planning-project-features-from-rfc/references/runtime-claude.md @@ -54,6 +54,20 @@ Reuse first. A worker matches when its `model` and `skills` cover the sub-plan's Newly created Claude worker agents are not discoverable to an already-running session. After creating or modifying a worker, tell the user the current session must be restarted before dispatch. +## Execution Dispatch + +RFC-backed plans with two or more sub-plans must include concrete lead-agent instructions and worker tables in the master plan. During execution, launch the assigned Claude worker agents rather than recreating their persona in prompt text. Do not rely on prompt wording to pick the right model, and do not let the coordinator execute a sub-plan directly when the plan assigned a worker or model tier. + +## TDD Isolation Mechanics + +If any sub-plan has testable acceptance criteria, the shared test-author worker must be paired with an isolation mechanism. Prefer Worktrunk when available, then Claude's native worktree mechanism, then `git worktree`. If this cannot be verified, the plan must say that structural TDD is blocked or explicitly skipped with a concrete reason; generic "runtime cannot isolate" language is not sufficient when a worktree plus worker dispatch path is available. + +## Model Assignment + +- Use the worker agent's explicit `model` field as the source of truth. +- Treat the plan's model tier as binding. Do not silently upgrade or downgrade. +- If the requested model cannot be used in the current Claude environment, stop and ask the user how to proceed. + ## Review Artifact Ownership The planner owns the `reviews/` directory and must verify each expected review artifact exists after reviewer completion. diff --git a/dot_agents/skills/planning-project-features-from-rfc/references/runtime-codex.md b/dot_agents/skills/planning-project-features-from-rfc/references/runtime-codex.md index a75751a..be86992 100644 --- a/dot_agents/skills/planning-project-features-from-rfc/references/runtime-codex.md +++ b/dot_agents/skills/planning-project-features-from-rfc/references/runtime-codex.md @@ -47,6 +47,20 @@ For each sub-plan, create or select a binding that specifies: If the binding is ephemeral, record its parameters in plan metadata or execution context so retries and resumed execution use the same model and skills. +## Execution Dispatch + +RFC-backed plans with two or more sub-plans must include concrete lead-agent instructions and worker/dispatch-recipe tables in the master plan. During execution, use Codex's actual worker dispatch mechanism with the recipe's explicit model and skills. Do not rely on prompt text alone to pick the right model, and do not let the coordinator execute a sub-plan directly when the plan assigned a worker or model tier. + +## TDD Isolation Mechanics + +If any sub-plan has testable acceptance criteria, the test-author dispatch recipe must be paired with an isolation mechanism. Prefer `wt` when available in the project workflow, then dispatch the test author into the isolated workspace. If this cannot be verified, the plan must say that structural TDD is blocked or explicitly skipped with a concrete reason; generic "runtime cannot isolate" language is not sufficient when a worktree plus worker dispatch path is available. + +## Model Assignment + +- Use Codex's explicit model-selection mechanism in the worker dispatch path. +- Treat the plan's model tier as binding. Do not silently upgrade or downgrade. +- If the requested model cannot be used in the current Codex environment, stop and ask the user how to proceed. + ## Review Artifact Ownership The planner owns the `reviews/` directory and must verify each expected review artifact exists after reviewer completion. diff --git a/dot_agents/skills/planning-project-features-from-rfc/references/runtime-opencode.md b/dot_agents/skills/planning-project-features-from-rfc/references/runtime-opencode.md index 88997a9..c466be3 100644 --- a/dot_agents/skills/planning-project-features-from-rfc/references/runtime-opencode.md +++ b/dot_agents/skills/planning-project-features-from-rfc/references/runtime-opencode.md @@ -8,16 +8,20 @@ This adapter maps `planning-project-features-from-rfc` to OpenCode-native mechan **Skill directories**: -- Project-local OpenCode skills: `.opencode/skills/` +- Project-local OpenCode skills: `.opencode/skills/` within the repository - Global OpenCode skills: `~/.config/opencode/skills/` - OpenCode also discovers compatible skills under `.claude/skills/`, `~/.claude/skills/`, `.agents/skills/`, and `~/.agents/skills/` **Agent directories**: -- Project-local OpenCode agents: `.opencode/agents/` +- Project-local OpenCode agents: `.opencode/agents/` within the repository - Global OpenCode agents: `~/.config/opencode/agents/` -If working from a dotfile source repository, use source-path equivalents when creating global artifacts. +If working from a dotfile source repository, use source-path equivalents when creating global artifacts. For example, use `private_dot_config/opencode/agents/` rather than editing `~/.config/opencode/agents/` directly. + +When the same agent name exists in both locations, project-local wins. When searching for reusable reviewer or execution bindings, check project-local first, then global. + +Also consult `AGENTS.md` for documented skill mappings, reviewer expectations, and project-specific conventions. ## Required Reviewers @@ -30,7 +34,7 @@ Do not launch `plan-architect-reviewer`, `plan-risk-reviewer`, `plan-clarity-rev ## Reviewer Bindings -OpenCode reviewer bindings are markdown-defined custom subagents. Discover project-local first, then global. Invoke reviewers by subagent name through OpenCode's normal subagent mechanism; do not recreate reviewer personas in prompt text. +OpenCode reviewer bindings are markdown-defined custom subagents. Discover project-local first, then global. Invoke reviewers by subagent name through OpenCode's normal subagent mechanism, including `@` mentions in interactive sessions when appropriate. Do not recreate reviewer personas in prompt text. Pass only: @@ -53,9 +57,33 @@ Reuse first. A binding matches when it covers the sub-plan's model tier, permiss - Set an explicit `model`; omitted models inherit from the caller and are not acceptable for plan-assigned model tiers. - Grant minimum permissions. - Allow required skills in `permission.skill` and instruct the subagent prompt to load them immediately. +- Deny or tightly scope nested `permission.task` unless the worker genuinely needs to spawn child agents. After establishing a new persistent binding, verify that OpenCode can invoke it. If not, tell the user a reload or session restart is required. +## Execution Dispatch + +RFC-backed plans with two or more sub-plans must include concrete lead-agent instructions and worker tables in the master plan. During execution, OpenCode workers are invoked through the runtime's native subagent mechanism. In interactive sessions, `@` mention is a valid native invocation path when it can invoke the named worker and preserve the worker's configured model and permissions. + +The CLI is an acceptable fallback when the current runtime surface cannot invoke project-local custom subagents directly, or when explicit workspace routing is required: + +```bash +opencode run --agent --dir "" +``` + +Do not rely on prompt text alone to pick the right model, and do not let the coordinator execute a sub-plan directly when the plan assigned a worker or model tier. + +## TDD Isolation Mechanics + +If any sub-plan has testable acceptance criteria, the test-author binding must be paired with an isolation mechanism. Prefer `wt` when available in the project workflow, then dispatch the test author into the isolated workspace. Same-workspace `@` invocation is not sufficient for structural TDD unless the runtime can prove it routes that subagent into the isolated worktree. Acceptable routing includes a verified native isolated-workspace dispatch mechanism or `opencode run --agent --dir `. If this cannot be verified, the plan must say that structural TDD is blocked or explicitly skipped with a concrete reason; generic "runtime cannot isolate" language is not sufficient when `wt` plus either native isolated-workspace dispatch or `opencode run --dir` is available. + +## Model Assignment + +- Use the custom subagent's explicit `model` field as the source of truth. +- Treat the plan's model tier as binding. Do not silently upgrade or downgrade. +- Because omitted `model` values inherit from the caller in OpenCode, do not treat inherited model selection as sufficient for a binding that is supposed to encode a plan decision. +- If the requested model cannot be used in the current OpenCode environment, stop and ask the user how to proceed. + ## Review Artifact Ownership The planner owns the `reviews/` directory and must verify each expected review artifact exists after reviewer completion. From 946986bc23b25e40a661be352408a907fb901ecf Mon Sep 17 00:00:00 2001 From: Timor Gruber Date: Sat, 9 May 2026 16:51:54 +0300 Subject: [PATCH 06/13] improve executing-plans skill to capture intent mismatches For easier diagnosis of what happened, such as not spawning worker agents, or spawning them with the wrong model, the skill now performs a "preflight" step that fills the progress file with the real-time data. It also is better instructed now to pay attention to worker agents, following the plan's intention rather than deciding on its own. At last, just as with the planning skill, the bindings were slightly corrected to perform better, especially for opencode regarding subagent spawning. --- dot_agents/skills/executing-plans/SKILL.md | 39 ++++++++++++------- .../assets/progress-template.md | 6 +++ .../references/runtime-claude.md | 4 +- .../references/runtime-codex.md | 5 ++- .../references/runtime-opencode.md | 21 +++++++++- 5 files changed, 58 insertions(+), 17 deletions(-) diff --git a/dot_agents/skills/executing-plans/SKILL.md b/dot_agents/skills/executing-plans/SKILL.md index 25dd141..f0c2ca5 100644 --- a/dot_agents/skills/executing-plans/SKILL.md +++ b/dot_agents/skills/executing-plans/SKILL.md @@ -31,6 +31,7 @@ Do not load or mix instructions from the other runtime adapter in the same turn. 2. **Progress Is Always Persisted**: After every meaningful step, update the progress file. If the session drops, the executor resumes from the last checkpoint — not from scratch. 3. **Tests Are Immutable to the Implementer**: The implementer cannot modify tests. If it believes a test is wrong, it reports this and moves on. Disputes are batched for human resolution. 4. **Independent Work Continues**: When a task is blocked (test dispute, failure), the executor continues with independent tasks. +5. **Execution Bindings Are Correctness, Not Optimization**: When a plan assigns worker bindings or model tiers, executing through those bindings is mandatory. The coordinator does not silently self-execute implementation work just because it can edit files. ## Workflow @@ -61,11 +62,22 @@ Check for an existing progress file alongside the plan (`progress.md` in the pla The progress file is the source of truth for task-by-task execution status, tests, blockers, disputes, and completion. If an anchor is active, update it only for feature-level context: new durable decisions, changed constraints, spec/plan deviations, unresolved questions, or handoff state. Do not duplicate progress tables or per-task status in the anchor. -### Step 3: Execute Tasks +### Step 3: Execution Preflight + +Before editing implementation files, verify the execution mechanics and record them in progress. + +1. **Worker binding audit**: For every sub-plan, identify the planned implementer worker, test author worker, model tier, and runtime dispatch mechanism. If the plan has two or more sub-plans, or if any sub-plan has an explicit model tier, the coordinator must dispatch through the assigned execution binding. Coordinator self-execution is allowed only for progress files, coordination artifacts, or a single trivial sub-plan with no explicit binding requirement. +2. **Binding availability check**: Use the active runtime adapter to verify every assigned worker is discoverable and invokable before executing any sub-plan. If a binding is missing or cannot be invoked, diagnose and retry once when the cause is mechanical (for example, stale discovery or permission shape). If it still cannot be invoked, stop and ask the user. Do not fall back to the coordinator or a more expensive model. +3. **TDD isolation check**: For each task with testable acceptance criteria, verify whether the active runtime can route the test author into an isolated workspace. If the runtime adapter provides an isolation path, attempt or otherwise concretely verify that path before skipping structural TDD. If verification fails, record the attempted mechanism and stop to ask whether to fix isolation or explicitly skip structural TDD. Do not record a generic "runtime cannot provide isolated workspace" reason while an untried runtime-specific isolation path exists. +4. **Progress audit update**: Add or update the progress file's execution audit with planned worker, actual worker, model/effort, dispatch evidence, and TDD gate status for each task. + +Only proceed once the audit is complete or the user explicitly authorizes a deviation. + +### Step 4: Execute Tasks For each task in execution order (respecting dependencies): -#### 3a. Test Authoring +#### 4a. Test Authoring **When to skip (no testable work)**: Some tasks don't have testable work — documentation updates, file moves, configuration changes, or tasks with no acceptance criteria. Skip test authoring for these and proceed directly to implementation. @@ -82,11 +94,11 @@ For each task in execution order (respecting dependencies): If the code surface isn't ready for TDD, skip test authoring and proceed directly to implementation without structural separation. Record the decision briefly in the progress file (Tests column: `skipped`, Notes: e.g., "declared untestable in AGENTS.md" or "no testable seams"). The implementer still receives the full task with acceptance criteria — it just doesn't receive pre-written tests and isn't bound by the test immutability constraint. -**Isolation capability gate**: Before structural TDD, use the active runtime adapter to verify that the runtime can place the test author in an isolated workspace that does not contain the planning artifacts. If the runtime cannot enforce that isolation, skip structural TDD and proceed directly to implementation. Record the decision briefly in the progress file (Tests column: `skipped`, Notes: e.g., "runtime cannot provide isolated test-author workspace"). +**Isolation capability gate**: Before structural TDD, use the active runtime adapter to verify that the runtime can place the test author in an isolated workspace that does not contain the planning artifacts. If the runtime has no enforceable isolation path, skip structural TDD and proceed directly to implementation. If the adapter names an isolation path but verification fails, stop and ask whether to fix isolation or explicitly skip structural TDD. Record the decision briefly in the progress file (Tests column: `skipped`, Notes: e.g., "no runtime isolation mechanism" or "user approved TDD skip after failed `` dispatch"). **Compilation readiness gate**: Before spawning the test author, verify that the target package compiles at the current execution state and that the required test infrastructure already exists. Earlier sub-plans may have changed interfaces or compile-time assertions in ways that leave the package temporarily uncompilable even though the task is still meant to follow structural TDD. Check for missing or stale mocks, fixtures, and test helpers as well. If the project uses generated mocks, confirm that the mock for the dependency under test exists and is up to date. -If either check fails, resolve the blocker within the TDD framework instead of skipping TDD or inverting the order. Acceptable scaffolding includes adding method stubs that panic or return zero values to satisfy interface assertions, adding the package to the mock generator configuration and regenerating mocks, or creating missing test helpers. These are temporary unblockers for the test author, not the task implementation itself. Record every scaffold created in the progress file so the implementer knows what must be replaced in step 3b. +If either check fails, resolve the blocker within the TDD framework instead of skipping TDD or inverting the order. Acceptable scaffolding includes adding method stubs that panic or return zero values to satisfy interface assertions, adding the package to the mock generator configuration and regenerating mocks, or creating missing test helpers. These are temporary unblockers for the test author, not the task implementation itself. Record every scaffold created in the progress file so the implementer knows what must be replaced in step 4b. **Isolation via isolated workspace**: The test author must NOT have access to plan files. Use the active runtime adapter's isolation mechanism to create a temporary isolated workspace containing the relevant code surface but not the planning artifacts. @@ -115,15 +127,15 @@ The test author writes tests grounded in the acceptance criteria and confirms th **Update progress**: Record test authoring as complete, note test file paths. -#### 3b. Implementation +#### 4b. Implementation -**Without structural TDD** (test authoring was skipped due to testability): Spawn an implementer sub-agent with the complete task. No pre-written tests exist, so the immutability constraint doesn't apply. The implementer implements against the acceptance criteria directly. It returns implementation status and files created/modified. Existing tests must still pass — regressions are still caught in step 3c. +**Without structural TDD** (test authoring was skipped due to testability): Spawn an implementer sub-agent with the complete task. No pre-written tests exist, so the immutability constraint doesn't apply. The implementer implements against the acceptance criteria directly. It returns implementation status and files created/modified. Existing tests must still pass — regressions are still caught in step 4c. -**With structural TDD** (tests were written in 3a): Spawn an **implementer** sub-agent with full context: +**With structural TDD** (tests were written in 4a): Spawn an **implementer** sub-agent with full context: **What the implementer receives:** - The complete task (all sections — design decisions, context, contracts, acceptance criteria) -- The tests written in step 3a (file paths) +- The tests written in step 4a (file paths) - The project's required skills (if the plan specifies them for this task, or via the implementer's execution binding) - Any outputs from prerequisite tasks (relayed by the executor) @@ -141,7 +153,7 @@ The implementer implements the task and runs tests. It returns: **Update progress**: Record implementation status, note created files, record any disputes. -#### 3c. Handle Results +#### 4c. Handle Results **With structural TDD:** @@ -155,7 +167,7 @@ The implementer implements the task and runs tests. It returns: - **Implementation complete, no regressions**: Mark task as `done`. Proceed to the next task. - **Existing tests regress**: Mark as `blocked: regression`. Same treatment as above — the implementation broke something outside its scope. -### Step 4: Resolve Blocks +### Step 5: Resolve Blocks When the executor reaches a natural breakpoint — all independent work is done, or all remaining tasks depend on a blocked one — present all blocked items to the user at once: @@ -170,7 +182,7 @@ When the executor reaches a natural breakpoint — all independent work is done, The user resolves each item (fix the test, fix the AC, adjust the task, or accept as-is). The executor then re-runs the affected tasks from the appropriate step (test authoring if AC changed, implementation if tests changed). -### Step 5: Completion +### Step 6: Completion When all tasks are `done`: @@ -198,13 +210,14 @@ If the plan specifies runtime-specific execution bindings, the executor uses the For tasks without testable AC (docs, config, file moves), only the implementer binding is needed. -If no execution bindings are specified, the executor spawns sub-agents with the runtime's default model and tool configuration. The structural isolation requirements and prompt-hygiene rules still apply regardless, but agents may not have skills preloaded. +If no execution bindings are specified, the executor may use the runtime's default sub-agent setup only for a single-sub-plan plan with no explicit model tier. For multi-sub-plan plans or plans with explicit model assignments, missing bindings are a blocker that must be resolved before implementation starts. ## Rules - **Never let the test author see design decisions** — the structural separation is the entire point. If the test author's prompt accidentally includes plan context, the separation is broken. -- **Never claim structural TDD without enforced isolation** — if the active runtime cannot place the test author in the required isolated workspace, skip structural TDD and record the reason. Prompt hygiene alone does not satisfy the physical isolation requirement. +- **Never claim structural TDD without enforced isolation** — if the active runtime cannot place the test author in the required isolated workspace, skip structural TDD only after the runtime-specific isolation path is verified unavailable or the user explicitly approves the skip. Prompt hygiene alone does not satisfy the physical isolation requirement. - **Never let the implementer modify tests** — disputes are recorded and batched, not resolved by the implementer. +- **Never self-execute assigned worker tasks** — if a task has an assigned worker or model tier, dispatch it through the runtime binding. If dispatch fails, diagnose and retry once, then stop and ask the user rather than doing the task in the coordinator context. - **Always update progress after each step** — this is the checkpoint mechanism. If you skip an update and the session drops, work is lost. - **Do not use anchors as progress files** — anchors preserve feature-level rationale and handoff context. Progress files preserve mechanical execution state. - **Continue independent work when blocked** — don't stop the entire execution because one task has a dispute. If the plan specifies dependencies, use them to determine what can proceed. If no dependencies are specified, treat remaining tasks as sequential and pause at the blocked one. diff --git a/dot_agents/skills/executing-plans/assets/progress-template.md b/dot_agents/skills/executing-plans/assets/progress-template.md index 9b7c0fd..bcad263 100644 --- a/dot_agents/skills/executing-plans/assets/progress-template.md +++ b/dot_agents/skills/executing-plans/assets/progress-template.md @@ -24,6 +24,12 @@ The progress file is the checkpoint mechanism. It must be updated after every me ## Current State +## Execution Audit + +| Task | Planned Worker | Actual Worker | Model / Effort | Dispatch Evidence | TDD Gate | +|------|----------------|---------------|----------------|-------------------|----------| +| | | | | | | + ## Test Artifacts diff --git a/dot_agents/skills/executing-plans/references/runtime-claude.md b/dot_agents/skills/executing-plans/references/runtime-claude.md index 3d4759a..8b090ea 100644 --- a/dot_agents/skills/executing-plans/references/runtime-claude.md +++ b/dot_agents/skills/executing-plans/references/runtime-claude.md @@ -10,6 +10,7 @@ This adapter maps the canonical execution workflow in `../SKILL.md` to Claude-na - Respect the model tier assigned by the plan — use the worker agent's `model` field, not prompt wording, to control model selection. - For testability gate exploration, prefer `subagent_type: "Explore"` at a cheap model tier. - Keep prompts narrow for test authors and complete for implementers, matching the canonical workflow. +- Before executing a multi-sub-plan plan, verify every assigned worker agent is discoverable to the current session. If a worker is missing or cannot be launched, diagnose and retry once when the cause is mechanical. If it still fails, stop and ask the user; do not perform assigned implementation in the coordinator context. ## Execution Bindings @@ -31,7 +32,7 @@ Claude execution bindings are **file-defined worker agents** under `.claude/agen **Contextual isolation**: Even with physical isolation, the test author's prompt must not reveal the plan path, task file path, feature name, or design rationale. Pass only acceptance criteria (inline as text, not as a file path) and the code surface the tests interact with. -**Structural TDD gate**: If Claude cannot create the isolated worktree (e.g., the repository has uncommitted changes that block worktree creation and the user declines to resolve them), skip structural TDD and record the reason in progress. +**Structural TDD gate**: If Claude cannot create the isolated worktree (e.g., the repository has uncommitted changes that block worktree creation and the user declines to resolve them), skip structural TDD only after recording the attempted mechanism and failure. If an isolation mechanism exists but dispatch into it fails, stop and ask whether to fix isolation or explicitly skip structural TDD. **Bringing test files back**: After the test author finishes, return the test files to the main execution workspace: @@ -49,6 +50,7 @@ Claude execution bindings are **file-defined worker agents** under `.claude/agen - The parent executor owns `progress.md` and updates it after each meaningful step. - Even if a worker writes files directly, the parent remains responsible for checkpointing and verifying that expected artifacts exist. +- Record dispatch evidence in progress: planned worker, actual worker, model/effort, runtime dispatch mechanism, workspace path, and TDD isolation outcome. ## Model Assignment diff --git a/dot_agents/skills/executing-plans/references/runtime-codex.md b/dot_agents/skills/executing-plans/references/runtime-codex.md index 32de86b..1f2dd88 100644 --- a/dot_agents/skills/executing-plans/references/runtime-codex.md +++ b/dot_agents/skills/executing-plans/references/runtime-codex.md @@ -9,6 +9,7 @@ This adapter maps the canonical execution workflow in `../SKILL.md` to Codex-nat - Use Codex sub-agent dispatch for exploration, test authoring, and implementation work when sub-agents materially help. - Set the sub-agent `model` explicitly using Codex's actual dispatch mechanism when the plan assigns a tier. - Prefer narrow prompts with only the context required for the specific role. +- Before executing a multi-sub-plan plan, verify every assigned dispatch recipe or worker can be invoked. If a binding fails, diagnose and retry once. If it still fails, stop and ask the user; do not perform assigned implementation in the coordinator context. ## Execution Bindings @@ -28,7 +29,8 @@ This adapter maps the canonical execution workflow in `../SKILL.md` to Codex-nat - Prefer `wt` to create an isolated workspace for the test author. - Use `git worktree` directly only as a fallback when `wt` is unavailable or unsuitable. - Structural TDD in Codex is allowed only when the test author can be dispatched into that isolated workspace. -- If the active Codex environment cannot run the test-author worker inside the isolated workspace, skip structural TDD and record a reason such as `runtime cannot provide isolated test-author workspace`. +- If `wt` or `git worktree` plus Codex worker dispatch is available, do not skip structural TDD without first attempting or concretely verifying the isolated dispatch path. +- If the active Codex environment cannot run the test-author worker inside the isolated workspace after an attempted dispatch, record the exact attempted mechanism and failure, then stop and ask whether to fix isolation or explicitly skip structural TDD. - Do not reveal the plan path, task file path, feature name, or design rationale to the test author. - Pass only acceptance criteria and the code surface the tests interact with. - When structural TDD is used, prompt hygiene is mandatory in addition to physical isolation. @@ -43,6 +45,7 @@ This adapter maps the canonical execution workflow in `../SKILL.md` to Codex-nat - The parent executor owns `progress.md` and should update it after each meaningful step. - Even if a sub-agent writes files directly, the parent remains responsible for checkpointing and artifact verification. +- Record dispatch evidence in progress: planned worker, actual worker, model/effort, runtime dispatch mechanism, workspace path, and TDD isolation outcome. ## Model Assignment diff --git a/dot_agents/skills/executing-plans/references/runtime-opencode.md b/dot_agents/skills/executing-plans/references/runtime-opencode.md index f9ff433..f0a790a 100644 --- a/dot_agents/skills/executing-plans/references/runtime-opencode.md +++ b/dot_agents/skills/executing-plans/references/runtime-opencode.md @@ -6,11 +6,19 @@ This adapter maps the canonical execution workflow in `../SKILL.md` to OpenCode- ## Exploration and Dispatch -- Use OpenCode subagents for exploration, test authoring, and implementation work when subagents materially help. +- Use OpenCode subagents for exploration, test authoring, and implementation work when subagents materially help. OpenCode subagents can be invoked through native subagent dispatch, including `@` mentions in an interactive session, and by primary agents through the Task tool when permitted. - Prefer the built-in `explore` subagent for cheap, read-only codebase exploration such as the testability gate. - Prefer a custom implementer or test-author subagent when the plan assigns a specific model tier or required skills; the built-in `general` subagent is acceptable only when no explicit binding is required. - Keep prompts narrow for test authors and complete for implementers, matching the canonical workflow. +For same-workspace assigned workers, prefer native OpenCode subagent dispatch when it can invoke the named custom subagent and preserve the assigned model/permissions. In interactive sessions, mentioning `@` is a valid native invocation path. If the current runtime surface cannot invoke project-local custom subagents directly, or when explicit workspace routing is required, the CLI dispatch path is an acceptable fallback: + +```bash +opencode run --agent --dir "" +``` + +Before executing a multi-sub-plan plan, verify that every assigned worker name appears in `opencode agent list` or the runtime's native subagent picker. If native dispatch of an assigned worker fails, diagnose and retry once. If native dispatch is unavailable in the current surface, use `opencode run --agent ...` as a fallback. If no dispatch path works, stop and ask the user; do not perform the assigned implementation in the coordinator context. + ## Execution Bindings OpenCode execution bindings should be **markdown-defined custom subagents** under `.opencode/agents/` (project-local) or `~/.config/opencode/agents/` (global). @@ -32,11 +40,18 @@ OpenCode execution bindings should be **markdown-defined custom subagents** unde When `wt` is used, create or enter the isolated workspace with `wt switch`. Per Worktrunk's documented behavior, `wt switch` is the command that switches to a worktree and creates one if needed; use `wt switch --create ` when the isolation branch does not exist yet. Its `--execute` mode can also be useful when you need to launch the agent directly inside the isolated worktree. +For OpenCode structural TDD, the required verification is that the test-author process actually runs in the isolated workspace. Same-workspace `@` invocation is not sufficient for this gate unless the runtime can prove it routes that subagent into the isolated worktree. Acceptable isolation-routing mechanisms include: + +- `wt switch --create --execute 'opencode run --agent --dir "$PWD" ""'` +- Creating the isolated worktree with `wt`, determining its path, then running `opencode run --agent --dir ""` +- Any native OpenCode subagent dispatch mechanism that explicitly targets the isolated worktree and can be verified before the test author sees context. + OpenCode's public docs describe subagents, custom agent files, and permissions, but they do not currently document a first-class mechanism for dispatching a subagent into an arbitrary alternate workspace. Therefore: - Do not assume structural TDD is possible just because subagents exist. - Structural TDD is allowed only when you can verify that the test-author subagent will actually run against the isolated worktree rather than the main workspace. -- If you cannot verify that workspace routing, skip structural TDD and record `runtime cannot provide isolated test-author workspace` in progress. +- If `wt` and either native isolated-workspace subagent routing or `opencode run --agent --dir` are available, do not skip structural TDD without first attempting or otherwise concretely verifying one of those routing paths. +- If routing cannot be verified after an attempted dispatch, record the exact attempted mechanism and failure in progress, then stop and ask whether to fix isolation or explicitly skip structural TDD. **Physical isolation**: The isolated worktree must contain only the tracked code surface the test author needs. Do not copy plan files into it. @@ -52,6 +67,7 @@ In both cases, verify that the test files now exist in the main execution worksp ## Implementer Dispatch - Dispatch a separate implementer subagent with the full task, test file paths, prerequisite outputs, and required skills. +- Use native OpenCode subagent dispatch for assigned implementer workers when available. Use `opencode run --agent --dir ""` when the current runtime surface cannot invoke the custom worker directly or when explicit workspace routing is needed. - Tell the implementer explicitly that tests are immutable. - If the implementer reports a dispute, record it in progress and continue with independent tasks per the canonical workflow. @@ -59,6 +75,7 @@ In both cases, verify that the test files now exist in the main execution worksp - The parent executor owns `progress.md` and updates it after each meaningful step. - Even if a subagent writes files directly, the parent remains responsible for checkpointing and verifying that expected artifacts exist. +- Record dispatch evidence in progress: planned worker, actual worker, model/effort from the worker binding, command or runtime mechanism used, workspace path, and TDD isolation outcome. ## Model Assignment From 25dc91b6ad966f45027296859d62f3b2016db58c Mon Sep 17 00:00:00 2001 From: Timor Gruber Date: Sat, 9 May 2026 16:55:36 +0300 Subject: [PATCH 07/13] improve plan-executability reviewer agents They more strictly verify the master plan contains dispatch instructions, which is necessary for plans with more than 2 sub-plans, as each sub-plan is assigned a worker agent with a carefully selected model tier, in an effort to reduce costs. --- .../agents/plan-executability-reviewer.md | 17 +++++++++++ .../agents/plan-executability-reviewer.md | 29 +++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/dot_claude/agents/plan-executability-reviewer.md b/dot_claude/agents/plan-executability-reviewer.md index ac78b90..eac209c 100644 --- a/dot_claude/agents/plan-executability-reviewer.md +++ b/dot_claude/agents/plan-executability-reviewer.md @@ -30,6 +30,8 @@ From the master plan and sub-plans, extract the facts that govern what an execut - Acceptance criteria and any verification commands or green-build claims - Public types, functions, methods, files, or commands a sub-plan says it removes, renames, or changes - Cross-sub-plan produces/consumes relationships and caller annotations +- Assigned implementer workers, test-author workers, model tiers, and runtime dispatch instructions +- TDD isolation gates and any documented skip reasons Use this matrix to judge whether each sub-plan can succeed without touching files it does not own. @@ -95,6 +97,19 @@ For each sub-plan that uses a test-author-first workflow, or for which the maste If either compilation readiness or test infrastructure availability fails, flag it as a concern, or as a critical finding when it blocks multiple sub-plans or makes the stated execution order impossible. Recommend a concrete repair: add temporary stubs to restore compilation before test authoring, move mock generator configuration and regeneration into an earlier sub-plan, or adjust execution order so the test infrastructure exists before the test author is dispatched. +### 9. Check Worker Dispatch And Model Enforcement + +For any plan with two or more sub-plans, or any plan that assigns model tiers, verify that the master plan contains executable lead-agent mechanics: + +- A worker table mapping every sub-plan to an implementer worker, model tier, and test-author worker when applicable. +- A statement that worker agents are required for multi-sub-plan execution, unless the plan explicitly justifies a narrow exception. +- Runtime-specific dispatch instructions or a pointer to the execution binding mechanism the executor must use. +- Lead-agent instructions that say to spawn assigned workers, run independent groups concurrently where supported, wait for dependencies, relay prerequisite outputs, and synthesize results. +- A stop condition for failed worker dispatch, model mismatch, or TDD isolation failure. The plan must not permit the coordinator to silently self-execute assigned work or fall back to a more expensive model. +- Progress or audit expectations that let a resumed executor verify which worker/model actually executed each task. + +If any of these are missing from a multi-sub-plan plan, treat it as a critical finding. Without these mechanics, the executor can accidentally run all work in the coordinator context and defeat the plan's model, skill, and isolation assumptions. + ## Output Format Write your findings to the review output file path provided by the calling agent. If no output path is provided, return your findings as your response instead. @@ -135,6 +150,8 @@ Be direct and specific - findings should be organized per affected sub-plan when - **Review executability, not risk.** Do not focus on hidden complexity or migration danger unless it creates a direct execution contradiction. - **Review executability, not clarity.** Vague wording matters here only when it forces the executing agent to make an out-of-scope decision or makes acceptance criteria mechanically unattainable. - **Treat the executing agent as literal-minded and ownership-bound.** If success depends on the agent making a judgment call about foreign files, that is a plan problem. +- **Treat worker dispatch as part of executability.** If a plan assigns workers or model tiers but does not make the lead agent use them mechanically, that is an execution contradiction, not a style issue. +- **Treat structural TDD isolation as part of executability.** If a test author is assigned but no isolated workspace path is provided or the skip reason is generic despite an available runtime mechanism, flag it. - **Prefer plan-internal evidence.** Use the plan's ownership tables, dependency sections, caller annotations, and acceptance criteria as the primary evidence. Do not inspect source code just to prove the plan wrong when the contradiction is already visible in the plan. - **Every finding must be actionable.** Recommend a specific repair such as narrowing verification scope, changing ownership, changing execution order, merging work, or explicitly acknowledging expected intermediate-state failures. - **Do not invent new requirements.** Judge whether the plan can be executed as written, not whether you would have planned it differently. diff --git a/private_dot_config/opencode/agents/plan-executability-reviewer.md b/private_dot_config/opencode/agents/plan-executability-reviewer.md index a866be2..f721a68 100644 --- a/private_dot_config/opencode/agents/plan-executability-reviewer.md +++ b/private_dot_config/opencode/agents/plan-executability-reviewer.md @@ -36,6 +36,8 @@ From the master plan and sub-plans, extract the facts that govern what an execut - Acceptance criteria and any verification commands or green-build claims - Public types, functions, methods, files, or commands a sub-plan says it removes, renames, or changes - Cross-sub-plan produces/consumes relationships and caller annotations +- Assigned implementer workers, test-author workers, model tiers, and runtime dispatch instructions +- TDD isolation gates and any documented skip reasons Use this matrix to judge whether each sub-plan can succeed without touching files it does not own. @@ -90,6 +92,31 @@ Evaluate the states between sequential and parallel sub-plans: Missing acknowledgment is usually a concern. A direct contradiction that makes a sub-plan's stated acceptance criteria unattainable is a critical finding. +### 8. Check TDD Workflow Feasibility + +For each sub-plan that uses a test-author-first workflow, or for which the master plan assigns a test author binding, evaluate the execution state at the point where the test author would run: + +- Does the target package compile at that point in the sequence? +- Do earlier sub-plans change interfaces, remove exports, or add abstract methods that the package under test has not satisfied yet? +- Do the required mocks, fixtures, helpers, or other test doubles already exist at that point? +- If mocks are generated, does the plan ensure they can be generated before the test author runs, or does it create a circular dependency where mock generation needs a compiling package that has not been repaired yet? +- Does the plan name a runtime isolation mechanism for the test author when structural TDD is expected? + +If compilation readiness, test infrastructure availability, or test-author isolation fails, flag it as a concern, or as a critical finding when it blocks multiple sub-plans or makes the stated execution order impossible. Recommend a concrete repair: add temporary stubs to restore compilation before test authoring, move mock generator configuration and regeneration into an earlier sub-plan, document the isolated workspace dispatch path, or adjust execution order so test infrastructure exists before the test author is dispatched. + +### 9. Check Worker Dispatch And Model Enforcement + +For any plan with two or more sub-plans, or any plan that assigns model tiers, verify that the master plan contains executable lead-agent mechanics: + +- A worker table mapping every sub-plan to an implementer worker, model tier, and test-author worker when applicable. +- A statement that worker agents are required for multi-sub-plan execution, unless the plan explicitly justifies a narrow exception. +- Runtime-specific dispatch instructions or a pointer to the execution binding mechanism the executor must use. +- Lead-agent instructions that say to spawn assigned workers, run independent groups concurrently where supported, wait for dependencies, relay prerequisite outputs, and synthesize results. +- A stop condition for failed worker dispatch, model mismatch, or TDD isolation failure. The plan must not permit the coordinator to silently self-execute assigned work or fall back to a more expensive model. +- Progress or audit expectations that let a resumed executor verify which worker/model actually executed each task. + +If any of these are missing from a multi-sub-plan plan, treat it as a critical finding. Without these mechanics, the executor can accidentally run all work in the coordinator context and defeat the plan's model, skill, and isolation assumptions. + ## Output Format Write your findings to the review output file path provided by the calling agent. If no output path is provided, return your findings as your response instead. @@ -130,6 +157,8 @@ Be direct and specific - findings should be organized per affected sub-plan when - **Review executability, not risk.** Do not focus on hidden complexity or migration danger unless it creates a direct execution contradiction. - **Review executability, not clarity.** Vague wording matters here only when it forces the executing agent to make an out-of-scope decision or makes acceptance criteria mechanically unattainable. - **Treat the executing agent as literal-minded and ownership-bound.** If success depends on the agent making a judgment call about foreign files, that is a plan problem. +- **Treat worker dispatch as part of executability.** If a plan assigns workers or model tiers but does not make the lead agent use them mechanically, that is an execution contradiction, not a style issue. +- **Treat structural TDD isolation as part of executability.** If a test author is assigned but no isolated workspace path is provided or the skip reason is generic despite an available runtime mechanism, flag it. - **Prefer plan-internal evidence.** Use the plan's ownership tables, dependency sections, caller annotations, and acceptance criteria as the primary evidence. Do not inspect source code just to prove the plan wrong when the contradiction is already visible in the plan. - **Every finding must be actionable.** Recommend a specific repair such as narrowing verification scope, changing ownership, changing execution order, merging work, or explicitly acknowledging expected intermediate-state failures. - **Do not invent new requirements.** Judge whether the plan can be executed as written, not whether you would have planned it differently. From cb4c75b7ce376acdcf71dd75b19972955d3be7e4 Mon Sep 17 00:00:00 2001 From: Timor Gruber Date: Sat, 9 May 2026 19:00:04 +0300 Subject: [PATCH 08/13] fix Claude reviewer launch mechanism in planning skills The Claude runtime adapters told the planner to launch reviewers with subagent_type "general-purpose", on the rationale that this would let them inherit Write/Edit from the agent definition. The opposite is true: dispatching as general-purpose discards the reviewer's specialized system prompt and replaces its declared tool list with general-purpose's broader set. Each project-local reviewer agent is already exposed as its own subagent_type and declares Write/Edit in its frontmatter, so direct dispatch loads both the right persona and the right tools. Update all four planning runtime-claude adapters (features-direct, features-from-rfc, epics-direct, epics-from-rfc) to require named-subagent_type dispatch and explicitly forbid general-purpose. Also rewrite the misleading Explore warning in features-direct so it no longer implies Explore was a candidate launch type. Remove a Claude-specific subagent_type string that had leaked into the runtime-neutral epics-direct SKILL.md and replace it with the same runtime-adapter delegation the other planning skills use. --- dot_agents/skills/planning-project-epics-direct/SKILL.md | 2 +- .../planning-project-epics-direct/references/runtime-claude.md | 2 +- .../references/runtime-claude.md | 2 +- .../references/runtime-claude.md | 2 +- .../references/runtime-claude.md | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dot_agents/skills/planning-project-epics-direct/SKILL.md b/dot_agents/skills/planning-project-epics-direct/SKILL.md index 0494ecf..a6b4bd9 100644 --- a/dot_agents/skills/planning-project-epics-direct/SKILL.md +++ b/dot_agents/skills/planning-project-epics-direct/SKILL.md @@ -122,7 +122,7 @@ After plan creation, run a review using the global reviewers before presenting t - **`plan-architect-reviewer`** — Evaluates feature boundaries, dependency graph, whether the decomposition will hold together when features are planned and built separately. - **`plan-risk-reviewer`** — Identifies risks: features that may be harder than they appear, dependency chains that could cause rework, missing considerations. -Pass the epic plan file path so reviewers can read it and cross-reference against the codebase. Launch reviewers with `subagent_type: "general-purpose"` so they inherit Write/Edit tools and can write review output directly. +Pass the epic plan file path so reviewers can read it and cross-reference against the codebase. Launch reviewers through the active runtime adapter's reviewer-launch mechanism so they receive the intended persona, tools, and model assignment. Pass the review output file path (e.g., `plans/epics/reviews/..md`) to each reviewer. Write-capable reviewers write the file directly; read-only reviewers return findings as their response, and the planner writes the file on their behalf. Check whether the file exists after the reviewer finishes. diff --git a/dot_agents/skills/planning-project-epics-direct/references/runtime-claude.md b/dot_agents/skills/planning-project-epics-direct/references/runtime-claude.md index b920a20..f71f5e5 100644 --- a/dot_agents/skills/planning-project-epics-direct/references/runtime-claude.md +++ b/dot_agents/skills/planning-project-epics-direct/references/runtime-claude.md @@ -17,7 +17,7 @@ Direct epic planning uses exactly these reviewers before user approval: ## Reviewer Bindings -Launch reviewer agents with `subagent_type: "general-purpose"` so they inherit their file-defined tools, including `Write`/`Edit` for review output. +Launch each reviewer agent through its own `subagent_type` (for example, `subagent_type: "plan-architect-reviewer"`). The named agent's frontmatter declares both its persona and its `tools` list — including `Write`/`Edit` for write-capable reviewers — so direct dispatch loads both the right system prompt and the right tools. Do not launch reviewers as `general-purpose`: that bypasses the reviewer's specialized system prompt and replaces its declared tool list with general-purpose's broader set. Pass only: diff --git a/dot_agents/skills/planning-project-epics-from-rfc/references/runtime-claude.md b/dot_agents/skills/planning-project-epics-from-rfc/references/runtime-claude.md index 749a7e2..909fa23 100644 --- a/dot_agents/skills/planning-project-epics-from-rfc/references/runtime-claude.md +++ b/dot_agents/skills/planning-project-epics-from-rfc/references/runtime-claude.md @@ -19,7 +19,7 @@ Do not launch `plan-risk-reviewer`; RFC-level risk belongs to `rfc-risk-reviewer ## Reviewer Bindings -Launch reviewer agents with `subagent_type: "general-purpose"` so they inherit their file-defined tools, including `Write`/`Edit` for review output. +Launch each reviewer agent through its own `subagent_type` (for example, `subagent_type: "plan-rfc-fidelity-reviewer"`). The named agent's frontmatter declares both its persona and its `tools` list — including `Write`/`Edit` for write-capable reviewers — so direct dispatch loads both the right system prompt and the right tools. Do not launch reviewers as `general-purpose`: that bypasses the reviewer's specialized system prompt and replaces its declared tool list with general-purpose's broader set. Pass only: diff --git a/dot_agents/skills/planning-project-features-direct/references/runtime-claude.md b/dot_agents/skills/planning-project-features-direct/references/runtime-claude.md index 1eceae0..99923d0 100644 --- a/dot_agents/skills/planning-project-features-direct/references/runtime-claude.md +++ b/dot_agents/skills/planning-project-features-direct/references/runtime-claude.md @@ -22,7 +22,7 @@ Also consult `AGENTS.md` (or `CLAUDE.md`) for documented skill mappings and doma ## Reviewer Bindings -**Launch mechanism**: Always launch reviewer agents with `subagent_type: "general-purpose"` so they inherit the full tool set declared in their agent definition — including `Write`/`Edit` for writing review output directly. Using a narrower or read-only `subagent_type` (e.g., `Explore`) silently strips write tools, which forces the planner to relay review output manually and defeats write-capable reviewer definitions. +**Launch mechanism**: Launch each reviewer agent through its own `subagent_type` (for example, `subagent_type: "plan-rfc-fidelity-reviewer"`). The named agent's frontmatter declares both its persona and its `tools` list — including `Write`/`Edit` for write-capable reviewers — so direct dispatch loads both the right system prompt and the right tools. Do not launch reviewers as `general-purpose`: that bypasses the reviewer's specialized system prompt and replaces its declared tool list with general-purpose's broader set, defeating the reviewer definition. Read-only agent types like `Explore` are not a substitute either; pick the actual reviewer's `subagent_type`. **Dispatch parameters**: Pass the plan directory path and the requested review output path (e.g., `reviews/00-master.architect.md`). diff --git a/dot_agents/skills/planning-project-features-from-rfc/references/runtime-claude.md b/dot_agents/skills/planning-project-features-from-rfc/references/runtime-claude.md index 5bacfd8..7700d76 100644 --- a/dot_agents/skills/planning-project-features-from-rfc/references/runtime-claude.md +++ b/dot_agents/skills/planning-project-features-from-rfc/references/runtime-claude.md @@ -29,7 +29,7 @@ Do not launch `plan-architect-reviewer`, `plan-risk-reviewer`, `plan-clarity-rev ## Reviewer Bindings -Launch reviewer agents with `subagent_type: "general-purpose"` so they inherit their file-defined tools, including `Write`/`Edit` for review output. +Launch each reviewer agent through its own `subagent_type` (for example, `subagent_type: "plan-rfc-fidelity-reviewer"`). The named agent's frontmatter declares both its persona and its `tools` list — including `Write`/`Edit` for write-capable reviewers — so direct dispatch loads both the right system prompt and the right tools. Do not launch reviewers as `general-purpose`: that bypasses the reviewer's specialized system prompt and replaces its declared tool list with general-purpose's broader set. Pass only: From d552e7f508417f106b15ec55e7d40e02fa6c19fc Mon Sep 17 00:00:00 2001 From: Timor Gruber Date: Sat, 9 May 2026 19:17:37 +0300 Subject: [PATCH 09/13] prioritize user-local Python over Homebrew Keep ~/.local/bin ahead of Homebrew after shellenv runs so uv's default Python symlinks remain stable. Document the PATH precedence rule in the shell startup flow. --- docs/processes/shell-startup.md | 4 ++-- dot_zshenv.tmpl | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/processes/shell-startup.md b/docs/processes/shell-startup.md index 46430e6..cb19b8c 100644 --- a/docs/processes/shell-startup.md +++ b/docs/processes/shell-startup.md @@ -72,12 +72,12 @@ flowchart TD ### `.zshenv` Phase (All Shells) -1. **Set base PATH** — Add `~/.local/bin`, `~/bin`, `/usr/local/bin` +1. **Set base PATH** — Add `~/.local/bin`, `~/bin`, `/usr/local/bin` with duplicate removal 2. **Determine Homebrew location** — Based on OS and architecture: - Linux: `/home/linuxbrew/.linuxbrew` - macOS arm64: `/opt/homebrew` - macOS x86: `/usr/local` -3. **Handle Homebrew loading** — Platform-dependent behavior (only if `BREW_LOADED` is not already set). See [deferred Homebrew loading][domain-deferred-brew] for the concept. +3. **Handle Homebrew loading** — Platform-dependent behavior (only if `BREW_LOADED` is not already set). See [deferred Homebrew loading][domain-deferred-brew] for the concept. When Homebrew's `shellenv` is evaluated, `~/.local/bin` and `~/bin` are re-prepended so user-local defaults such as uv-managed `python`/`python3` symlinks take precedence over Homebrew binaries. - **macOS (non-devbox)**: Set `DEFER_BREW_LOAD=true` — postpone the expensive `shellenv` eval to `.zshrc` - **Linux**: Call `load_brew_env` immediately — PATH consistency is more important than startup speed - **Devbox**: Add brew directories to PATH directly — no `eval` needed diff --git a/dot_zshenv.tmpl b/dot_zshenv.tmpl index 9ce6f0e..7473bb8 100644 --- a/dot_zshenv.tmpl +++ b/dot_zshenv.tmpl @@ -1,9 +1,15 @@ -export PATH=$HOME/.local/bin:$HOME/bin:/usr/local/bin:$PATH +typeset -U path PATH +path=("$HOME/.local/bin" "$HOME/bin" /usr/local/bin $path) +export PATH export GPG_TTY=$(tty) function load_brew_env() { # Load (home)brew eval "$("$BREW_BINARY" shellenv)" + + # Homebrew prepends its bin directories; keep user-local defaults first. + path=("$HOME/.local/bin" "$HOME/bin" $path) + export PATH } if [[ ! -v BREW_LOADED || "$BREW_LOADED" == "false" ]]; then From 216012729f6404ccc1c0e83b954cdc738902d8f0 Mon Sep 17 00:00:00 2001 From: Timor Gruber Date: Sat, 9 May 2026 19:43:34 +0300 Subject: [PATCH 10/13] fix `rtk git` not allowing signed commits Use normal git instead for committing only. --- dot_claude/CLAUDE.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dot_claude/CLAUDE.md b/dot_claude/CLAUDE.md index 65d0b07..8b32f22 100644 --- a/dot_claude/CLAUDE.md +++ b/dot_claude/CLAUDE.md @@ -25,6 +25,12 @@ - Write the subject as a present-simple verb phrase so it reads naturally after: "If I were to apply this commit, it would ". Always start with a lowercase verb. - Use the body to explain the motivation for the change and, when helpful, a high-level summary of what changed. Avoid low-level implementation detail. +## Signed Git Commits + +- Use `rtk git` for Git commands by default, including read-only inspection and ordinary write operations, to reduce output. +- Do not use `rtk git` for creating or rewriting signed commit objects. Use `/usr/bin/git` directly for `commit`, `commit-tree`, `commit --amend`, `rebase --exec ... commit ...`, and any command whose purpose is to create or recreate signed commits. +- All commits are expected to be signed, so commit creation should use `/usr/bin/git` rather than `rtk git`. + ## Plan Execution - If a sub-agent fails, diagnose the failure and retry with a fix — do NOT silently take over the work yourself. From d8a69dfd63141ba28a39ac7e37c17fd85adcf6f0 Mon Sep 17 00:00:00 2001 From: Timor Gruber Date: Sat, 9 May 2026 19:56:45 +0300 Subject: [PATCH 11/13] check Homebrew packages directly Homebrew list failures should not block installer setup when the only question is whether a single package exists. Use package-specific lookup so CI and users avoid enumerating unrelated Homebrew state before installing chezmoi. --- installer/lib/brew/brew.go | 24 +++++++++++++++--------- installer/lib/brew/brew_test.go | 32 +++++++++++++++++++++++++++----- 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/installer/lib/brew/brew.go b/installer/lib/brew/brew.go index f1e64ef..c24b431 100644 --- a/installer/lib/brew/brew.go +++ b/installer/lib/brew/brew.go @@ -105,21 +105,27 @@ func (b *BrewPackageManager) InstallPackage(requestedPackageInfo pkgmanager.Requ func (b *BrewPackageManager) IsPackageInstalled(packageInfo pkgmanager.PackageInfo) (bool, error) { b.logger.Debug("Checking if package %s is installed with Homebrew", packageInfo.Name) - // Check if the package is installed by listing all installed packages and checking for the package name. - packages, err := b.ListInstalledPackages() + result, err := b.commander.RunCommand( + b.brewPath, + []string{"list", "--versions", packageInfo.Name}, + utils.WithCaptureOutput(), + ) if err != nil { + if result != nil && result.ExitCode == 1 && strings.TrimSpace(result.String()) == "" && strings.TrimSpace(result.StderrString()) == "" { + b.logger.Debug("Package %s is not installed with Homebrew", packageInfo.Name) + return false, nil + } + return false, errors.New("failed to list installed packages with Homebrew: " + err.Error()) } - for _, pkg := range packages { - if pkg.Name == packageInfo.Name { - b.logger.Debug("Package %s is installed with Homebrew", packageInfo.Name) - return true, nil - } + if strings.TrimSpace(result.String()) == "" { + b.logger.Debug("Package %s is not installed with Homebrew", packageInfo.Name) + return false, nil } - b.logger.Debug("Package %s is not installed with Homebrew", packageInfo.Name) - return false, nil + b.logger.Debug("Package %s is installed with Homebrew", packageInfo.Name) + return true, nil } // ListInstalledPackages implements pkgmanager.PackageManager. diff --git a/installer/lib/brew/brew_test.go b/installer/lib/brew/brew_test.go index 85b99b3..c372bc1 100644 --- a/installer/lib/brew/brew_test.go +++ b/installer/lib/brew/brew_test.go @@ -219,7 +219,7 @@ func Test_InstallPackage_ReturnsError_WhenInstallationFails(t *testing.T) { func Test_IsPackageInstalled_ReturnsTrue_WhenPackageIsInstalled(t *testing.T) { mockCommander := &utils.MoqCommander{ RunCommandFunc: func(name string, args []string, opts ...utils.Option) (*utils.Result, error) { - if name == "/usr/local/bin/brew" && len(args) == 2 && args[0] == "list" && args[1] == "--versions" { + if name == "/usr/local/bin/brew" && len(args) == 3 && args[0] == "list" && args[1] == "--versions" && args[2] == "git" { output := "git 2.39.0\nnode 18.12.1\nvim 9.0.0500" return &utils.Result{ Stdout: []byte(output), @@ -242,11 +242,12 @@ func Test_IsPackageInstalled_ReturnsTrue_WhenPackageIsInstalled(t *testing.T) { func Test_IsPackageInstalled_ReturnsFalse_WhenPackageIsNotInstalled(t *testing.T) { mockCommander := &utils.MoqCommander{ RunCommandFunc: func(name string, args []string, opts ...utils.Option) (*utils.Result, error) { - if name == "/usr/local/bin/brew" && len(args) == 2 && args[0] == "list" && args[1] == "--versions" { - output := "git 2.39.0\nvim 9.0.0500" + if name == "/usr/local/bin/brew" && len(args) == 3 && args[0] == "list" && args[1] == "--versions" && args[2] == "nonexistent" { + output := "" return &utils.Result{ - Stdout: []byte(output), - }, nil + Stdout: []byte(output), + ExitCode: 1, + }, errors.New("exit status 1") } return nil, errors.New("unexpected command") }, @@ -262,6 +263,27 @@ func Test_IsPackageInstalled_ReturnsFalse_WhenPackageIsNotInstalled(t *testing.T require.False(t, isInstalled) } +func Test_IsPackageInstalled_UsesPackageSpecificBrewList(t *testing.T) { + mockCommander := &utils.MoqCommander{ + RunCommandFunc: func(name string, args []string, opts ...utils.Option) (*utils.Result, error) { + require.Equal(t, "/usr/local/bin/brew", name) + require.Equal(t, []string{"list", "--versions", "chezmoi"}, args) + return &utils.Result{ + Stdout: []byte("chezmoi 2.65.0"), + }, nil + }, + } + mockProgramQuery := &osmanager.MoqProgramQuery{} + + packageManager := brew.NewBrewPackageManager(logger.DefaultLogger, mockCommander, mockProgramQuery, "/usr/local/bin/brew", utils.DisplayModeProgress) + packageInfo := pkgmanager.NewPackageInfo("chezmoi", "") + + isInstalled, err := packageManager.IsPackageInstalled(packageInfo) + + require.NoError(t, err) + require.True(t, isInstalled) +} + func Test_IsPackageInstalled_ReturnsError_WhenListInstalledPackagesFails(t *testing.T) { mockCommander := &utils.MoqCommander{ RunCommandFunc: func(name string, args []string, opts ...utils.Option) (*utils.Result, error) { From e5e4c6a00a2e269126bda80200df1033871254e4 Mon Sep 17 00:00:00 2001 From: Timor Gruber Date: Sat, 9 May 2026 19:56:51 +0300 Subject: [PATCH 12/13] make Expect GPG test logs ASCII-only Fedora containers can run Expect under a non-UTF-8 locale, where emoji logging causes Tcl to fail before the installer starts. Keep the automation output ASCII-only so locale differences do not break the interactive GPG CI path. --- installer/test-interactive-gpg.exp | 44 +++++++++++++++--------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/installer/test-interactive-gpg.exp b/installer/test-interactive-gpg.exp index 4688ce5..cdc0427 100755 --- a/installer/test-interactive-gpg.exp +++ b/installer/test-interactive-gpg.exp @@ -44,7 +44,7 @@ exp_internal 0 log_user 1 # Start the installer -puts "🔑 Starting interactive GPG test with:" +puts "Starting interactive GPG test with:" puts " Installer: $installer_path" puts " Email: $email" puts " Name: $name" @@ -71,70 +71,70 @@ set tool_select_step 0 expect { # GPG email prompts (various possible formats) -re "(?i).*(email|e-?mail address)" { - puts "📧 Entering email address..." + puts "Entering email address..." send "$email\r" exp_continue } # GPG name/full name prompts -re "(?i).*(full name|name|real name)" { - puts "👤 Entering full name..." + puts "Entering full name..." send "$name\r" exp_continue } # GPG passphrase prompts -re "(?i).*(passphrase|password)" { - puts "🔐 Entering passphrase..." + puts "Entering passphrase..." send "$passphrase\r" exp_continue } # GPG "okay" confirmation - exact match for the problematic prompt -re "Change.*Name.*Email.*kay.*uit" { - puts "✅ GPG change/okay prompt (sending O)..." + puts "GPG change/okay prompt (sending O)..." send "O\r" exp_continue } # GPG "okay" confirmation - matches "(O)kay" pattern -re "\\(O\\)kay" { - puts "✅ GPG okay confirmation (sending O)..." + puts "GPG okay confirmation (sending O)..." send "O\r" exp_continue } # Fallback: any prompt containing "(O)" - likely GPG menu -re "\\(O\\)" { - puts "✅ GPG menu with O option (sending O)..." + puts "GPG menu with O option (sending O)..." send "O\r" exp_continue } # GPG key generation prompts -re "(?i).*(key.*size|rsa.*bits)" { - puts "🔧 Using default key size..." + puts "Using default key size..." send "\r" exp_continue } # Key expiration prompts -re "(?i).*(key.*expir|expir.*date)" { - puts "⏰ Setting key expiration..." + puts "Setting key expiration..." send "0\r" exp_continue } # GPG key type selection (default to RSA) -re "(?i).*(kind of key|key.*type)" { - puts "🔑 Selecting default key type..." + puts "Selecting default key type..." send "\r" exp_continue } # GPG comment field (usually optional) -re "(?i).*comment" { - puts "💬 Skipping comment field..." + puts "Skipping comment field..." send "\r" exp_continue } @@ -146,19 +146,19 @@ expect { incr tool_select_step switch $tool_select_step { 1 { - puts "🔧 Tool selection: selecting item 1..." + puts "Tool selection: selecting item 1..." send "1\r" } 2 { - puts "🔧 Tool selection: selecting item 2..." + puts "Tool selection: selecting item 2..." send "2\r" } 3 { - puts "🔧 Tool selection: selecting item 3..." + puts "Tool selection: selecting item 3..." send "3\r" } 4 { - puts "🔧 Tool selection: confirming with 0..." + puts "Tool selection: confirming with 0..." send "0\r" } } @@ -167,7 +167,7 @@ expect { # Error patterns -re "(?i).*(error|fail|abort)" { - puts "❌ Error detected in output" + puts "Error detected in output" set error_output $expect_out(buffer) puts "Error details: $error_output" # Don't exit immediately - some errors might be expected in CI @@ -176,34 +176,34 @@ expect { # Success patterns -re "(?i).*(success|complete|finished|done)" { - puts "✅ Installation appears to have completed successfully" + puts "Installation appears to have completed successfully" exp_continue } # Timeout handling timeout { - puts "⏰ Timeout reached after 300 seconds" + puts "Timeout reached after 300 seconds" exit 1 } # End of output eof { - puts "📋 Process completed" + puts "Process completed" # Get exit status catch wait result set exit_code [lindex $result 3] puts "Exit code: $exit_code" if {$exit_code == 0} { - puts "✅ Installation completed successfully!" + puts "Installation completed successfully!" exit 0 } else { - puts "❌ Installation failed with exit code: $exit_code" + puts "Installation failed with exit code: $exit_code" exit $exit_code } } } # This should never be reached, but just in case -puts "🤔 Unexpected end of script" +puts "Unexpected end of script" exit 0 From 4a4a7397f13325db6a668be4af113e2f927d91a9 Mon Sep 17 00:00:00 2001 From: Timor Gruber Date: Sat, 9 May 2026 20:02:55 +0300 Subject: [PATCH 13/13] use Homebrew installed prefix checks Homebrew documents --prefix --installed as the installed-formula probe. It returns a failing status when the formula is absent, and may still emit diagnostic output. Use that command and treat exit code 1 as not installed so macOS CI can fall through to installing chezmoi instead of aborting during detection. --- installer/lib/brew/brew.go | 11 +++-------- installer/lib/brew/brew_test.go | 19 +++++++++---------- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/installer/lib/brew/brew.go b/installer/lib/brew/brew.go index c24b431..af0da77 100644 --- a/installer/lib/brew/brew.go +++ b/installer/lib/brew/brew.go @@ -107,21 +107,16 @@ func (b *BrewPackageManager) IsPackageInstalled(packageInfo pkgmanager.PackageIn result, err := b.commander.RunCommand( b.brewPath, - []string{"list", "--versions", packageInfo.Name}, + []string{"--prefix", "--installed", packageInfo.Name}, utils.WithCaptureOutput(), ) if err != nil { - if result != nil && result.ExitCode == 1 && strings.TrimSpace(result.String()) == "" && strings.TrimSpace(result.StderrString()) == "" { + if result != nil && result.ExitCode == 1 { b.logger.Debug("Package %s is not installed with Homebrew", packageInfo.Name) return false, nil } - return false, errors.New("failed to list installed packages with Homebrew: " + err.Error()) - } - - if strings.TrimSpace(result.String()) == "" { - b.logger.Debug("Package %s is not installed with Homebrew", packageInfo.Name) - return false, nil + return false, errors.New("failed to check package installation with Homebrew: " + err.Error()) } b.logger.Debug("Package %s is installed with Homebrew", packageInfo.Name) diff --git a/installer/lib/brew/brew_test.go b/installer/lib/brew/brew_test.go index c372bc1..57e6593 100644 --- a/installer/lib/brew/brew_test.go +++ b/installer/lib/brew/brew_test.go @@ -219,8 +219,8 @@ func Test_InstallPackage_ReturnsError_WhenInstallationFails(t *testing.T) { func Test_IsPackageInstalled_ReturnsTrue_WhenPackageIsInstalled(t *testing.T) { mockCommander := &utils.MoqCommander{ RunCommandFunc: func(name string, args []string, opts ...utils.Option) (*utils.Result, error) { - if name == "/usr/local/bin/brew" && len(args) == 3 && args[0] == "list" && args[1] == "--versions" && args[2] == "git" { - output := "git 2.39.0\nnode 18.12.1\nvim 9.0.0500" + if name == "/usr/local/bin/brew" && len(args) == 3 && args[0] == "--prefix" && args[1] == "--installed" && args[2] == "git" { + output := "/opt/homebrew/opt/git" return &utils.Result{ Stdout: []byte(output), }, nil @@ -242,10 +242,9 @@ func Test_IsPackageInstalled_ReturnsTrue_WhenPackageIsInstalled(t *testing.T) { func Test_IsPackageInstalled_ReturnsFalse_WhenPackageIsNotInstalled(t *testing.T) { mockCommander := &utils.MoqCommander{ RunCommandFunc: func(name string, args []string, opts ...utils.Option) (*utils.Result, error) { - if name == "/usr/local/bin/brew" && len(args) == 3 && args[0] == "list" && args[1] == "--versions" && args[2] == "nonexistent" { - output := "" + if name == "/usr/local/bin/brew" && len(args) == 3 && args[0] == "--prefix" && args[1] == "--installed" && args[2] == "nonexistent" { return &utils.Result{ - Stdout: []byte(output), + Stderr: []byte("Error: No such keg: /opt/homebrew/Cellar/nonexistent"), ExitCode: 1, }, errors.New("exit status 1") } @@ -263,13 +262,13 @@ func Test_IsPackageInstalled_ReturnsFalse_WhenPackageIsNotInstalled(t *testing.T require.False(t, isInstalled) } -func Test_IsPackageInstalled_UsesPackageSpecificBrewList(t *testing.T) { +func Test_IsPackageInstalled_UsesPackageSpecificInstalledPrefixCheck(t *testing.T) { mockCommander := &utils.MoqCommander{ RunCommandFunc: func(name string, args []string, opts ...utils.Option) (*utils.Result, error) { require.Equal(t, "/usr/local/bin/brew", name) - require.Equal(t, []string{"list", "--versions", "chezmoi"}, args) + require.Equal(t, []string{"--prefix", "--installed", "chezmoi"}, args) return &utils.Result{ - Stdout: []byte("chezmoi 2.65.0"), + Stdout: []byte("/opt/homebrew/opt/chezmoi"), }, nil }, } @@ -284,7 +283,7 @@ func Test_IsPackageInstalled_UsesPackageSpecificBrewList(t *testing.T) { require.True(t, isInstalled) } -func Test_IsPackageInstalled_ReturnsError_WhenListInstalledPackagesFails(t *testing.T) { +func Test_IsPackageInstalled_ReturnsError_WhenInstalledPrefixCheckFails(t *testing.T) { mockCommander := &utils.MoqCommander{ RunCommandFunc: func(name string, args []string, opts ...utils.Option) (*utils.Result, error) { return nil, errors.New("command failed") @@ -298,7 +297,7 @@ func Test_IsPackageInstalled_ReturnsError_WhenListInstalledPackagesFails(t *test isInstalled, err := packageManager.IsPackageInstalled(packageInfo) require.Error(t, err) - require.Contains(t, err.Error(), "list installed packages") + require.Contains(t, err.Error(), "check package installation") require.False(t, isInstalled) }