diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bfeed5a..294a836 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,10 +32,14 @@ jobs: - name: Compile scripts run: | python3 -m py_compile \ + plugins/codex-fable5/skills/codex-fable5/scripts/codex_findings.py \ plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py \ plugins/codex-fable5/skills/codex-fable5/scripts/fable_coverage.py \ plugins/codex-fable5/skills/codex-fable5/scripts/make_litellm_config.py \ tests/test_scripts.py + sh -n plugins/codex-fable5/bin/codex-fable5 + sh -n plugins/codex-fable5/bin/codex-findings + sh -n plugins/codex-fable5/bin/codex-goals - name: Validate coverage matrix run: python3 plugins/codex-fable5/skills/codex-fable5/scripts/fable_coverage.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d508c6..57b9126 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,15 @@ This project uses a lightweight changelog format: ## Unreleased +### Added + +- Added `codex_findings.py` to track evidence-backed review findings and gate final completion. +- Added the `codex-fable5` wrapper command for checkout-based terminal use, with `codex-findings` and `codex-goals` as advanced aliases. + +### Changed + +- Integrated open or blocked findings into the final `codex_goals.py` checkpoint gate. + ## 0.3.1 - 2026-06-15 ### Added diff --git a/README.md b/README.md index 08ca5de..dc2214a 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ It helps Codex work in a more structured way: inspect first, track goals, gather - Adds a Fable-style, tool-first agent loop for coding and research tasks. - Provides a simple goal ledger with evidence checkpoints. +- Provides a findings ledger for review issues that must be resolved before final completion. - Encourages conclusion-first answers, clue-first debugging, and cheapest useful checks first. - Adds an optional final verification gate before claiming success. - Tracks `CLAUDE-FABLE-5.md` source-heading coverage with explicit Codex decisions. @@ -75,24 +76,96 @@ In Codex: ```text Use $codex-fable5 to analyze this project. +Use $codex-fable5 to do this with findings tracking. + $codex-fable5로 이 프로젝트를 분석해줘. +$codex-fable5로 이 작업 진행하고, 리뷰 findings도 추적해줘. + $codex-fable5 を使って、このプロジェクトを分析してください。 +$codex-fable5 を使ってこの作業を進め、レビュー findings も追跡してください。 + 请使用 $codex-fable5 分析这个项目。 +请使用 $codex-fable5 完成这项工作,并跟踪审查 findings。 + 請使用 $codex-fable5 分析這個專案。 + +請使用 $codex-fable5 完成這項工作,並追蹤審查 findings。 +``` + +Use the local helper from a checkout: + +```bash +export PATH="$PWD/plugins/codex-fable5/bin:$PATH" +codex-fable5 status ``` -Create a simple multi-goal ledger: +Without changing `PATH`, run them by path: ```bash -python3 plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py create --brief "Migration" \ +plugins/codex-fable5/bin/codex-fable5 status +``` + +For longer work, create a goal ledger: + +```bash +codex-fable5 goals create --brief "Migration" \ --goal "inspect::Find current behavior and tests" \ --goal "change::Implement the migration" \ --goal "verify::Run tests and inspect output" ``` +Track review findings before final completion: + +```bash +codex-fable5 findings add \ + --title "Unresolved review issue" \ + --severity high \ + --source subagent \ + --evidence "Review found a missing final gate." +codex-fable5 findings gate +``` + +Final goal completion also fails while open or blocked findings remain. `codex-goals` and `codex-findings` are still available as advanced aliases. + +--- + +## Findings Gate + +The findings gate is an optional review closeout flow for work where missed issues are costly. + +Use it when: + +- A review or sub-agent finds actionable issues. +- Verification is uncertain or failed once. +- The task touches several files, migrations, security-sensitive code, or release behavior. + +Do not use it for simple edits or routine answers. + +In normal Codex use, ask for it in the prompt: + +```text +Use $codex-fable5 to implement this and track findings before final completion. +``` + +For terminal use, add findings as accepted repair work: + +```bash +codex-fable5 findings add \ + --title "Missing final verification" \ + --evidence "Review found no command output proving the final state." +codex-fable5 findings next +codex-fable5 findings resolve \ + --id F001 \ + --evidence "Added final verification." \ + --verify-evidence "Tests passed and final status was checked." +codex-fable5 findings gate +``` + +`codex-fable5 goals checkpoint --status complete` will fail while open or blocked findings remain. + --- ## Measure Fable 5 Coverage diff --git a/docs/RELEASING.md b/docs/RELEASING.md index 8b0fb9c..57dcf8b 100644 --- a/docs/RELEASING.md +++ b/docs/RELEASING.md @@ -13,10 +13,14 @@ This project uses a lightweight release process because it is a small Codex plug ```bash python3 -m unittest discover -s tests -v python3 -m py_compile \ + plugins/codex-fable5/skills/codex-fable5/scripts/codex_findings.py \ plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py \ plugins/codex-fable5/skills/codex-fable5/scripts/fable_coverage.py \ plugins/codex-fable5/skills/codex-fable5/scripts/make_litellm_config.py \ tests/test_scripts.py +sh -n plugins/codex-fable5/bin/codex-fable5 +sh -n plugins/codex-fable5/bin/codex-findings +sh -n plugins/codex-fable5/bin/codex-goals python3 plugins/codex-fable5/skills/codex-fable5/scripts/fable_coverage.py ``` diff --git a/examples/AGENTS.md b/examples/AGENTS.md index ed8d917..c302f9e 100644 --- a/examples/AGENTS.md +++ b/examples/AGENTS.md @@ -12,6 +12,7 @@ Use this guidance when you want a repo to default to a Fable-style Codex workflo - Implement requested changes when feasible; do not stop at a proposal unless asked. - Verify every meaningful change with tests, lint, typecheck, command output, screenshots, or source inspection. - If verification fails, iterate before returning the task. +- For review-sensitive work, track evidence-backed findings explicitly and close them with verification evidence before final completion. ## Investigation And Diagnosis @@ -46,6 +47,7 @@ Use this guidance when you want a repo to default to a Fable-style Codex workflo - Lead with the outcome or recommendation. - Prefer readable prose over compressed fragments. - Use 2-pass review only for high-cost misses: missing requirements, factual/numeric errors, unexplained clues, or length/scope violations. +- Treat accepted review findings as repair work, not suggestions to remember informally. ## Boundaries diff --git a/plugins/codex-fable5/.codex-plugin/plugin.json b/plugins/codex-fable5/.codex-plugin/plugin.json index 3ff5daf..e357cf2 100644 --- a/plugins/codex-fable5/.codex-plugin/plugin.json +++ b/plugins/codex-fable5/.codex-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "codex-fable5", "version": "0.3.1", - "description": "Fable-style Codex workflow with source-section coverage, goal gates, verification grounding, VFF routing, and optional provider bridge guidance.", + "description": "Fable-style Codex workflow with source-section coverage, goal and findings gates, verification grounding, VFF routing, and optional provider bridge guidance.", "author": { "name": "Codex Fable5 Maintainers" }, @@ -11,7 +11,7 @@ "interface": { "displayName": "Codex Fable5", "shortDescription": "Fable-style gates and routing.", - "longDescription": "Codex Fable5 packages a Codex-native skill that adapts Claude Fable 5, fablize, and Value-for-Fable ideas into a tool-first workflow with source-section coverage accounting, evidence checkpoints, verification grounding, clue-first diagnosis, cost-aware routing, optional 2-pass review, and LiteLLM provider bridge guidance for users who already have authorized Anthropic model access.", + "longDescription": "Codex Fable5 packages a Codex-native skill that adapts Claude Fable 5, fablize, and Value-for-Fable ideas into a tool-first workflow with source-section coverage accounting, evidence checkpoints, review findings gates, verification grounding, clue-first diagnosis, cost-aware routing, optional 2-pass review, and LiteLLM provider bridge guidance for users who already have authorized Anthropic model access.", "developerName": "Codex Fable5 Maintainers", "category": "Productivity", "capabilities": ["Skills"], diff --git a/plugins/codex-fable5/bin/codex-fable5 b/plugins/codex-fable5/bin/codex-fable5 new file mode 100755 index 0000000..cc89156 --- /dev/null +++ b/plugins/codex-fable5/bin/codex-fable5 @@ -0,0 +1,53 @@ +#!/usr/bin/env sh +set -eu + +SCRIPT_DIR=$(CDPATH= cd "$(dirname "$0")" && pwd) +GOALS="$SCRIPT_DIR/../skills/codex-fable5/scripts/codex_goals.py" +FINDINGS="$SCRIPT_DIR/../skills/codex-fable5/scripts/codex_findings.py" + +usage() { + cat <<'EOF' +Usage: + codex-fable5 status + codex-fable5 goals [args...] + codex-fable5 findings [args...] + +Examples: + codex-fable5 status + codex-fable5 findings add --title "Missing verification" --evidence "No test evidence" + codex-fable5 findings gate + codex-fable5 goals next +EOF +} + +if [ "$#" -eq 0 ]; then + usage + exit 0 +fi + +case "$1" in + -h|--help|help) + usage + ;; + status) + python3 "$FINDINGS" status + if [ -f ".codex-fable5/goals.json" ]; then + python3 "$GOALS" status + else + printf '%s\n' "codex-fable5: no goal plan" + fi + ;; + findings|finding|f) + shift + exec python3 "$FINDINGS" "$@" + ;; + goals|goal|g) + shift + exec python3 "$GOALS" "$@" + ;; + *) + printf '%s\n' "codex-fable5: unknown command '$1'" >&2 + usage >&2 + exit 2 + ;; +esac diff --git a/plugins/codex-fable5/bin/codex-findings b/plugins/codex-fable5/bin/codex-findings new file mode 100755 index 0000000..cf53970 --- /dev/null +++ b/plugins/codex-fable5/bin/codex-findings @@ -0,0 +1,5 @@ +#!/usr/bin/env sh +set -eu + +SCRIPT_DIR=$(CDPATH= cd "$(dirname "$0")" && pwd) +exec python3 "$SCRIPT_DIR/../skills/codex-fable5/scripts/codex_findings.py" "$@" diff --git a/plugins/codex-fable5/bin/codex-goals b/plugins/codex-fable5/bin/codex-goals new file mode 100755 index 0000000..f60af91 --- /dev/null +++ b/plugins/codex-fable5/bin/codex-goals @@ -0,0 +1,5 @@ +#!/usr/bin/env sh +set -eu + +SCRIPT_DIR=$(CDPATH= cd "$(dirname "$0")" && pwd) +exec python3 "$SCRIPT_DIR/../skills/codex-fable5/scripts/codex_goals.py" "$@" diff --git a/plugins/codex-fable5/skills/codex-fable5/SKILL.md b/plugins/codex-fable5/skills/codex-fable5/SKILL.md index 0e6c022..1136038 100644 --- a/plugins/codex-fable5/skills/codex-fable5/SKILL.md +++ b/plugins/codex-fable5/skills/codex-fable5/SKILL.md @@ -52,6 +52,7 @@ Use this skill to translate Fable-style behavior into Codex behavior. It does no - Prefer real tools, tests, rendered artifacts, and current sources over memory. - Implement the requested change, not only a proposal, unless the user clearly asks for analysis only. - Verify with the narrowest strong evidence that covers the requirement: tests, lint, typecheck, screenshots, command output, source inspection, or connector readback. + - For review-sensitive work, use `scripts/codex_findings.py` to track evidence-backed findings and require the findings gate to pass before final completion. - If verification fails, iterate once or more before handing the issue back. - Summarize what changed, what was verified, and any residual risk. @@ -102,5 +103,7 @@ Use the smallest durable surface that fits: ## Scripts - Run `scripts/codex_goals.py` for a local, stdlib-only multi-story ledger with evidence checkpoints and a final verification gate. +- Run `scripts/codex_findings.py` for a local, stdlib-only review findings ledger. Final `codex_goals.py` checkpoints fail while open or blocked findings remain. +- For user-facing terminal use from a checkout, add `plugins/codex-fable5/bin` to `PATH` and run `codex-fable5 status`, `codex-fable5 goals ...`, or `codex-fable5 findings ...`. - Run `scripts/fable_coverage.py --source /path/to/CLAUDE-FABLE-5.md` to verify that every source heading is accounted for in `references/coverage-matrix.md`. - Run `scripts/make_litellm_config.py` to generate a LiteLLM config for an Anthropic model alias. Use this only after confirming the user has a valid Anthropic key and model access. diff --git a/plugins/codex-fable5/skills/codex-fable5/references/operating-structure.md b/plugins/codex-fable5/skills/codex-fable5/references/operating-structure.md index a84c4d6..3659366 100644 --- a/plugins/codex-fable5/skills/codex-fable5/references/operating-structure.md +++ b/plugins/codex-fable5/skills/codex-fable5/references/operating-structure.md @@ -53,6 +53,13 @@ Review criteria: - Clues the explanation does not cover. - Length, scope, or format violations. +When a review finds actionable issues, track them as findings instead of relying on memory. Use the findings ledger for review-sensitive work: + +- Add only evidence-backed findings. +- Resolve accepted findings through the normal inspect, change, verify loop. +- Re-review only unresolved or materially changed areas. +- Run the findings gate before final completion when open findings may remain; final goal checkpoints also fail while blocking findings remain. + Keep the review narrow. Do not invent new standards during review, and do not rewrite a passing draft for taste. ## Writing And Research diff --git a/plugins/codex-fable5/skills/codex-fable5/references/provenance.md b/plugins/codex-fable5/skills/codex-fable5/references/provenance.md index 3c80502..f07e9e7 100644 --- a/plugins/codex-fable5/skills/codex-fable5/references/provenance.md +++ b/plugins/codex-fable5/skills/codex-fable5/references/provenance.md @@ -24,6 +24,7 @@ From `value-for-fable`: - Outcome-first readable communication. - Clue-first diagnosis and cheapest discriminating measurement. - Optional 2-pass review for high-cost misses. +- Evidence-backed findings closeout for review issues. - Long-session drift awareness. - Avoiding over-compression when readability and completeness matter. diff --git a/plugins/codex-fable5/skills/codex-fable5/references/task-routing.md b/plugins/codex-fable5/skills/codex-fable5/references/task-routing.md index 4379a82..9d788fe 100644 --- a/plugins/codex-fable5/skills/codex-fable5/references/task-routing.md +++ b/plugins/codex-fable5/skills/codex-fable5/references/task-routing.md @@ -11,22 +11,25 @@ This reference adapts the useful procedural ideas from `fablize` and `value-for- | HTML, CSS, SVG, game, canvas, chart, UI, animation, local app | Use verification grounding: run, observe, fix, re-run after changes. | | Diagnosis, architecture decision, product/technical tradeoff | Use VFF operating structure: conclusion first, clue-first hypothesis, cheapest discriminating measurement. | | High-stakes or deep unfamiliar domain | Suggest higher reasoning or stronger model; optionally use 2-pass review. | +| Review requested, failed/uncertain verification, security-sensitive change, or multi-file work with costly misses | Use the findings ledger and gate. | | Simple one-step edit or factual answer | Keep the normal Codex loop; do not add goal files or extra process. | ## Goal Ledger Use `scripts/codex_goals.py` when there are multiple dependent stories and the task benefits from resume-safe state. +For user-facing terminal use from a checkout, add `plugins/codex-fable5/bin` to `PATH` and use `codex-fable5 goals`. + Example: ```bash -python3 plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py create --brief "Add CSV import" \ +codex-fable5 goals create --brief "Add CSV import" \ --goal "inspect::Find current import flow and tests" \ --goal "implement::Add CSV parser and UI path" \ --goal "verify::Run tests and a sample import" -python3 plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py next -python3 plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py checkpoint --id G001 --status complete --evidence "Read importer.ts and import.test.ts" -python3 plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py next +codex-fable5 goals next +codex-fable5 goals checkpoint --id G001 --status complete --evidence "Read importer.ts and import.test.ts" +codex-fable5 goals next ``` Rules: @@ -37,6 +40,40 @@ Rules: - On resume, run `status` first. - Store local state under `.codex-fable5/`; do not commit it unless the user asks. +## Findings Ledger + +Use `scripts/codex_findings.py` when review or verification produces evidence-backed issues that must not be lost before final completion. + +For user-facing terminal use from a checkout, add `plugins/codex-fable5/bin` to `PATH` and use `codex-fable5 findings`. + +Example: + +```bash +codex-fable5 findings add \ + --title "Final checkpoint can pass with unresolved review issues" \ + --severity high \ + --source subagent \ + --evidence "Review found that the final gate only checks tests, not accepted findings." +codex-fable5 findings next +codex-fable5 findings resolve \ + --id F001 \ + --evidence "Added a findings gate before final checkpoint." \ + --verify-cmd "python3 -m unittest discover -s tests -v" \ + --verify-evidence "all tests passed" +codex-fable5 findings gate +``` + +Rules: + +- Treat findings as accepted repair work, not brainstorming notes. +- Add only evidence-backed missing requirements, regressions, factual/source errors, failed checks, or unexplained clues. +- When a goal is active, new findings attach to that goal automatically unless `--goal` is provided. +- Resolve findings only after the normal inspect/change/verify loop produces resolution evidence and verification evidence. +- Run `gate` before completing a final goal checkpoint when accepted findings may remain. +- Final `codex-fable5 goals checkpoint --status complete` also fails while open or blocked findings remain. +- By default, `gate` fails on open or blocked findings. Use `--allow-blocked` only when the remaining blocked findings are explicitly accepted as residual risk. +- Store local state under `.codex-fable5/`; do not commit it unless the user asks. + ## Investigation Protocol For unknown-cause debugging: diff --git a/plugins/codex-fable5/skills/codex-fable5/scripts/codex_findings.py b/plugins/codex-fable5/skills/codex-fable5/scripts/codex_findings.py new file mode 100755 index 0000000..51f3fe6 --- /dev/null +++ b/plugins/codex-fable5/skills/codex-fable5/scripts/codex_findings.py @@ -0,0 +1,361 @@ +#!/usr/bin/env python3 +"""Codex Fable5 findings ledger for review-and-repair gates.""" + +from __future__ import annotations + +import argparse +import json +import re +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +STATE_DIR = Path(".codex-fable5") +GOALS_FILE = STATE_DIR / "goals.json" +FINDINGS_FILE = STATE_DIR / "findings.json" +LEDGER_FILE = STATE_DIR / "ledger.jsonl" + +OPEN_STATUSES = {"open"} +BLOCKING_STATUSES = {"open", "blocked"} +TERMINAL_STATUSES = {"resolved", "rejected"} +SEVERITY_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3} + + +def now() -> str: + return datetime.now(timezone.utc).isoformat() + + +def write_json(path: Path, data: dict[str, Any]) -> None: + STATE_DIR.mkdir(exist_ok=True) + path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") + + +def append_event(event: str, **fields: Any) -> None: + STATE_DIR.mkdir(exist_ok=True) + record = {"ts": now(), "event": event, **fields} + with LEDGER_FILE.open("a", encoding="utf-8") as handle: + handle.write(json.dumps(record, ensure_ascii=False) + "\n") + + +def load_findings() -> dict[str, Any]: + if not FINDINGS_FILE.exists(): + return {"created": now(), "findings": []} + data = json.loads(FINDINGS_FILE.read_text(encoding="utf-8")) + data.setdefault("findings", []) + return data + + +def save_findings(data: dict[str, Any]) -> None: + data["updated"] = now() + write_json(FINDINGS_FILE, data) + + +def load_goals() -> dict[str, Any] | None: + if not GOALS_FILE.exists(): + return None + return json.loads(GOALS_FILE.read_text(encoding="utf-8")) + + +def active_goal_id() -> str: + goals = load_goals() + if goals is None: + return "" + active = [goal for goal in goals.get("goals", []) if goal.get("status") == "in_progress"] + if len(active) != 1: + return "" + return str(active[0].get("id", "")) + + +def next_finding_id(findings: list[dict[str, Any]]) -> str: + max_seen = 0 + for finding in findings: + match = re.fullmatch(r"F(\d+)", str(finding.get("id", ""))) + if match: + max_seen = max(max_seen, int(match.group(1))) + return f"F{max_seen + 1:03d}" + + +def get_finding(data: dict[str, Any], finding_id: str) -> dict[str, Any]: + for finding in data["findings"]: + if finding.get("id") == finding_id: + return finding + sys.exit(f"codex-fable5: unknown finding id {finding_id}.") + + +def require_text(value: str, label: str) -> str: + text = value.strip() + if not text: + sys.exit(f"codex-fable5: {label} must be non-empty.") + return text + + +def sort_findings(findings: list[dict[str, Any]]) -> list[dict[str, Any]]: + return sorted( + findings, + key=lambda item: ( + SEVERITY_ORDER.get(str(item.get("severity", "medium")), 99), + str(item.get("id", "")), + ), + ) + + +def format_finding(finding: dict[str, Any]) -> str: + goal = f" goal={finding['goal']}" if finding.get("goal") else "" + location = f" location={finding['location']}" if finding.get("location") else "" + return ( + f"{finding['id']} [{finding['status']}] {finding['severity']} " + f"{finding['title']}{goal}{location}" + ) + + +def cmd_add(args: argparse.Namespace) -> None: + data = load_findings() + finding_id = next_finding_id(data["findings"]) + goal = args.goal.strip() or active_goal_id() + title = require_text(args.title, "--title") + evidence = require_text(args.evidence, "--evidence") + finding = { + "id": finding_id, + "goal": goal, + "title": title, + "severity": args.severity, + "source": args.source, + "status": "open", + "location": args.location.strip(), + "evidence": evidence, + "resolution": "", + "verify_cmd": "", + "verify_evidence": "", + "created": now(), + "updated": "", + } + data["findings"].append(finding) + save_findings(data) + append_event( + "finding_added", + id=finding_id, + goal=goal, + severity=args.severity, + source=args.source, + title=finding["title"], + ) + print(f"codex-fable5: added {finding_id}") + print(format_finding(finding)) + + +def cmd_list(args: argparse.Namespace) -> None: + data = load_findings() + findings = data["findings"] + if args.status: + findings = [finding for finding in findings if finding.get("status") == args.status] + if args.goal: + findings = [finding for finding in findings if finding.get("goal") == args.goal] + + if not findings: + print("codex-fable5: no findings") + return + + for finding in sort_findings(findings): + print(format_finding(finding)) + if args.verbose: + print(f" evidence: {finding.get('evidence', '')}") + if finding.get("resolution"): + print(f" resolution: {finding['resolution']}") + if finding.get("verify_cmd"): + print(f" verify_cmd: {finding['verify_cmd']}") + if finding.get("verify_evidence"): + print(f" verify_evidence: {finding['verify_evidence']}") + + +def cmd_next(args: argparse.Namespace) -> None: + data = load_findings() + findings = [finding for finding in data["findings"] if finding.get("status") == "open"] + if args.goal: + findings = [finding for finding in findings if finding.get("goal") == args.goal] + if not findings: + print("codex-fable5: no open findings") + return + + finding = sort_findings(findings)[0] + print(f"=== codex-fable5 finding: {finding['id']} {finding['title']}") + print(f"Severity: {finding['severity']}") + if finding.get("goal"): + print(f"Goal: {finding['goal']}") + if finding.get("location"): + print(f"Location: {finding['location']}") + print(f"Evidence: {finding['evidence']}") + print( + f"On resolution: codex-fable5 findings resolve --id {finding['id']} " + '--evidence "" --verify-evidence ""' + ) + + +def cmd_resolve(args: argparse.Namespace) -> None: + data = load_findings() + finding = get_finding(data, args.id) + if finding["status"] not in {"open", "blocked"}: + sys.exit(f"codex-fable5: {args.id} is {finding['status']}; reopen it first.") + + evidence = require_text(args.evidence, "--evidence") + verify_evidence = require_text(args.verify_evidence, "--verify-evidence") + finding["status"] = "resolved" + finding["resolution"] = evidence + finding["verify_cmd"] = args.verify_cmd.strip() + finding["verify_evidence"] = verify_evidence + finding["updated"] = now() + save_findings(data) + append_event( + "finding_resolved", + id=args.id, + goal=finding.get("goal", ""), + verify_cmd=finding["verify_cmd"], + verify_evidence=finding["verify_evidence"], + ) + print(f"codex-fable5: {args.id} -> resolved") + + +def cmd_reject(args: argparse.Namespace) -> None: + data = load_findings() + finding = get_finding(data, args.id) + if finding["status"] in TERMINAL_STATUSES: + sys.exit(f"codex-fable5: {args.id} is already {finding['status']}.") + + reason = require_text(args.reason, "--reason") + finding["status"] = "rejected" + finding["resolution"] = reason + finding["updated"] = now() + save_findings(data) + append_event("finding_rejected", id=args.id, goal=finding.get("goal", ""), reason=reason) + print(f"codex-fable5: {args.id} -> rejected") + + +def cmd_block(args: argparse.Namespace) -> None: + data = load_findings() + finding = get_finding(data, args.id) + if finding["status"] in TERMINAL_STATUSES: + sys.exit(f"codex-fable5: {args.id} is already {finding['status']}.") + + reason = require_text(args.reason, "--reason") + finding["status"] = "blocked" + finding["resolution"] = reason + finding["updated"] = now() + save_findings(data) + append_event("finding_blocked", id=args.id, goal=finding.get("goal", ""), reason=reason) + print(f"codex-fable5: {args.id} -> blocked") + + +def cmd_reopen(args: argparse.Namespace) -> None: + data = load_findings() + finding = get_finding(data, args.id) + previous_status = finding["status"] + finding["status"] = "open" + finding["resolution"] = "" + finding["verify_cmd"] = "" + finding["verify_evidence"] = "" + finding["updated"] = now() + save_findings(data) + append_event("finding_reopened", id=args.id, previous_status=previous_status) + print(f"codex-fable5: {args.id} reopened from {previous_status}") + + +def cmd_gate(args: argparse.Namespace) -> None: + data = load_findings() + blocking_statuses = OPEN_STATUSES if args.allow_blocked else BLOCKING_STATUSES + blockers = [ + finding + for finding in data["findings"] + if finding.get("status") in blocking_statuses + and (not args.goal or finding.get("goal") == args.goal) + ] + blockers = sort_findings(blockers) + if blockers: + print(f"codex-fable5: findings gate failed; {len(blockers)} blocking findings remain") + for finding in blockers: + print(f" {format_finding(finding)}") + sys.exit(1) + + scope = f" for {args.goal}" if args.goal else "" + print(f"codex-fable5: findings gate passed{scope}") + + +def cmd_status(_: argparse.Namespace) -> None: + data = load_findings() + counts = { + status: sum(1 for finding in data["findings"] if finding.get("status") == status) + for status in ["open", "blocked", "resolved", "rejected"] + } + summary = ", ".join(f"{count} {status}" for status, count in counts.items() if count) + if not summary: + summary = "0 findings" + print(f"codex-fable5: {summary}") + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(prog="codex-fable5 findings") + sub = parser.add_subparsers(dest="command", required=True) + + add = sub.add_parser("add") + add.add_argument("--title", required=True) + add.add_argument("--evidence", required=True) + add.add_argument("--severity", choices=["low", "medium", "high", "critical"], default="medium") + add.add_argument( + "--source", + choices=["main", "subagent", "test", "user", "review", "command"], + default="main", + ) + add.add_argument("--goal", default="") + add.add_argument("--location", default="") + + list_cmd = sub.add_parser("list") + list_cmd.add_argument("--status", choices=["open", "blocked", "resolved", "rejected"]) + list_cmd.add_argument("--goal", default="") + list_cmd.add_argument("--verbose", action="store_true") + + next_cmd = sub.add_parser("next") + next_cmd.add_argument("--goal", default="") + + resolve = sub.add_parser("resolve") + resolve.add_argument("--id", required=True) + resolve.add_argument("--evidence", required=True) + resolve.add_argument("--verify-evidence", required=True) + resolve.add_argument("--verify-cmd", dest="verify_cmd", default="") + + reject = sub.add_parser("reject") + reject.add_argument("--id", required=True) + reject.add_argument("--reason", required=True) + + block = sub.add_parser("block") + block.add_argument("--id", required=True) + block.add_argument("--reason", required=True) + + reopen = sub.add_parser("reopen") + reopen.add_argument("--id", required=True) + + gate = sub.add_parser("gate") + gate.add_argument("--goal", default="") + gate.add_argument("--allow-blocked", action="store_true") + + sub.add_parser("status") + return parser + + +def main() -> int: + args = build_parser().parse_args() + handlers = { + "add": cmd_add, + "list": cmd_list, + "next": cmd_next, + "resolve": cmd_resolve, + "reject": cmd_reject, + "block": cmd_block, + "reopen": cmd_reopen, + "gate": cmd_gate, + "status": cmd_status, + } + handlers[args.command](args) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py b/plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py index aeb223d..dffe45a 100755 --- a/plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py +++ b/plugins/codex-fable5/skills/codex-fable5/scripts/codex_goals.py @@ -12,9 +12,11 @@ STATE_DIR = Path(".codex-fable5") GOALS_FILE = STATE_DIR / "goals.json" +FINDINGS_FILE = STATE_DIR / "findings.json" LEDGER_FILE = STATE_DIR / "ledger.jsonl" OPEN_STATUSES = {"pending", "in_progress"} INCOMPLETE_TERMINAL_STATUSES = {"failed", "blocked"} +BLOCKING_FINDING_STATUSES = {"open", "blocked"} def now() -> str: @@ -70,6 +72,17 @@ def terminal_incomplete_goals(goals: list[dict[str, Any]]) -> list[dict[str, Any return [goal for goal in goals if goal["status"] in INCOMPLETE_TERMINAL_STATUSES] +def blocking_findings() -> list[dict[str, Any]]: + if not FINDINGS_FILE.exists(): + return [] + data = json.loads(FINDINGS_FILE.read_text(encoding="utf-8")) + return [ + finding + for finding in data.get("findings", []) + if finding.get("status") in BLOCKING_FINDING_STATUSES + ] + + def cmd_create(args: argparse.Namespace) -> None: if GOALS_FILE.exists() and not args.force: sys.exit("codex-fable5: plan already exists. Use `status` or replace it with --force.") @@ -118,7 +131,7 @@ def cmd_next(_: argparse.Namespace) -> None: print(f"Objective: {goal['objective']}") print("Rule: work this story only and produce concrete evidence.") command = ( - f"codex_goals.py checkpoint --id {goal['id']} --status complete " + f"codex-fable5 goals checkpoint --id {goal['id']} --status complete " '--evidence ""' ) if is_final: @@ -143,6 +156,14 @@ def cmd_checkpoint(args: argparse.Namespace) -> None: sys.exit("codex-fable5: complete checkpoints require non-empty --evidence.") if goal["id"] == plan["goals"][-1]["id"] and not (verify_cmd and verify_evidence): sys.exit("codex-fable5: final story requires --verify-cmd and --verify-evidence.") + if goal["id"] == plan["goals"][-1]["id"]: + findings = blocking_findings() + if findings: + ids = ", ".join(str(finding.get("id", "?")) for finding in findings) + sys.exit( + "codex-fable5: final story requires findings gate; " + f"{len(findings)} blocking findings remain ({ids})." + ) goal["status"] = args.status goal["evidence"] = evidence @@ -189,7 +210,7 @@ def cmd_status(_: argparse.Namespace) -> None: def build_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(prog="codex_goals.py") + parser = argparse.ArgumentParser(prog="codex-fable5 goals") sub = parser.add_subparsers(dest="command", required=True) create = sub.add_parser("create") diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 5177117..f45c24f 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -2,6 +2,7 @@ import importlib.util import json +import os import subprocess import sys import tempfile @@ -13,6 +14,7 @@ ROOT = Path(__file__).resolve().parents[1] SKILL_ROOT = ROOT / "plugins" / "codex-fable5" / "skills" / "codex-fable5" SCRIPTS = SKILL_ROOT / "scripts" +BIN = ROOT / "plugins" / "codex-fable5" / "bin" def load_script(name: str): @@ -29,6 +31,7 @@ class ScriptTests(unittest.TestCase): @classmethod def setUpClass(cls) -> None: cls.fable_coverage = load_script("fable_coverage") + cls.codex_findings = load_script("codex_findings") cls.make_litellm_config = load_script("make_litellm_config") def test_manifest_json_files_are_valid(self) -> None: @@ -126,6 +129,10 @@ def test_ci_workflow_runs_project_verification(self) -> None: self.assertIn("actions/setup-python@v6", workflow) self.assertIn("python3 -m unittest discover -s tests -v", workflow) self.assertIn("python3 -m py_compile", workflow) + self.assertIn("codex_findings.py", workflow) + self.assertIn("sh -n plugins/codex-fable5/bin/codex-fable5", workflow) + self.assertIn("sh -n plugins/codex-fable5/bin/codex-findings", workflow) + self.assertIn("sh -n plugins/codex-fable5/bin/codex-goals", workflow) self.assertIn("fable_coverage.py", workflow) self.assertIn('python-version: ["3.11", "3.12", "3.13"]', workflow) @@ -152,6 +159,96 @@ def test_coverage_matrix_is_valid(self) -> None: self.assertIn("coverage matrix valid", result.stdout) self.assertIn("implemented=", result.stdout) + def test_user_facing_wrappers_run_from_path(self) -> None: + env = {**os.environ, "PATH": f"{BIN}{os.pathsep}{os.environ['PATH']}"} + for command in ["codex-fable5", "codex-findings", "codex-goals"]: + with self.subTest(command=command): + wrapper = BIN / command + self.assertTrue(wrapper.is_file()) + self.assertTrue(os.access(wrapper, os.X_OK)) + + syntax = subprocess.run( + ["sh", "-n", str(wrapper)], + cwd=ROOT, + text=True, + capture_output=True, + check=False, + ) + self.assertEqual(syntax.returncode, 0, syntax.stderr) + + with tempfile.TemporaryDirectory() as tmp: + status = subprocess.run( + ["codex-fable5", "status"], + cwd=tmp, + env=env, + text=True, + capture_output=True, + check=False, + ) + self.assertEqual(status.returncode, 0, status.stderr) + self.assertIn("0 findings", status.stdout) + self.assertIn("no goal plan", status.stdout) + + created = subprocess.run( + [ + "codex-fable5", + "goals", + "create", + "--brief", + "Wrapper smoke", + "--goal", + "inspect::Check wrapper path", + ], + cwd=tmp, + env=env, + text=True, + capture_output=True, + check=False, + ) + self.assertEqual(created.returncode, 0, created.stderr) + self.assertIn("plan created", created.stdout) + + started = subprocess.run( + ["codex-fable5", "goals", "next"], + cwd=tmp, + env=env, + text=True, + capture_output=True, + check=False, + ) + self.assertEqual(started.returncode, 0, started.stderr) + + added = subprocess.run( + [ + "codex-fable5", + "findings", + "add", + "--title", + "Wrapper finding", + "--evidence", + "PATH wrapper should call the findings script.", + ], + cwd=tmp, + env=env, + text=True, + capture_output=True, + check=False, + ) + self.assertEqual(added.returncode, 0, added.stderr) + self.assertIn("goal=G001", added.stdout) + + status_with_plan = subprocess.run( + ["codex-fable5", "status"], + cwd=tmp, + env=env, + text=True, + capture_output=True, + check=False, + ) + self.assertEqual(status_with_plan.returncode, 0, status_with_plan.stderr) + self.assertIn("1 open", status_with_plan.stdout) + self.assertIn("0/1 complete", status_with_plan.stdout) + def test_coverage_helpers_parse_headings_and_matrix_rows(self) -> None: with tempfile.TemporaryDirectory() as tmp: tmp_path = Path(tmp) @@ -388,6 +485,211 @@ def run(*args: str) -> subprocess.CompletedProcess[str]: self.assertEqual(reopened.returncode, 0, reopened.stderr) self.assertIn("Reopened G001 from blocked", reopened.stdout) + def test_findings_flow_blocks_gate_until_resolved(self) -> None: + script = SCRIPTS / "codex_findings.py" + with tempfile.TemporaryDirectory() as tmp: + cwd = Path(tmp) + + def run(*args: str) -> subprocess.CompletedProcess[str]: + return subprocess.run( + [sys.executable, str(script), *args], + cwd=cwd, + text=True, + capture_output=True, + check=False, + ) + + added = run( + "add", + "--title", + "Final gate ignores unresolved review issue", + "--severity", + "high", + "--source", + "subagent", + "--evidence", + "Review found an accepted finding without a closeout gate.", + ) + self.assertEqual(added.returncode, 0, added.stderr) + self.assertIn("added F001", added.stdout) + + gate_failed = run("gate") + self.assertNotEqual(gate_failed.returncode, 0) + self.assertIn("findings gate failed", gate_failed.stdout) + self.assertIn("F001 [open] high", gate_failed.stdout) + + missing_verification = run( + "resolve", + "--id", + "F001", + "--evidence", + "Added findings gate.", + ) + self.assertNotEqual(missing_verification.returncode, 0) + self.assertIn("verify-evidence", missing_verification.stderr) + + resolved = run( + "resolve", + "--id", + "F001", + "--evidence", + "Added findings gate.", + "--verify-cmd", + "python3 -m unittest discover -s tests -v", + "--verify-evidence", + "targeted tests passed", + ) + self.assertEqual(resolved.returncode, 0, resolved.stderr) + + gate_passed = run("gate") + self.assertEqual(gate_passed.returncode, 0, gate_passed.stderr) + self.assertIn("findings gate passed", gate_passed.stdout) + + status = run("status") + self.assertEqual(status.returncode, 0, status.stderr) + self.assertIn("1 resolved", status.stdout) + + def test_findings_auto_attach_to_active_goal_and_blocked_gate_policy(self) -> None: + goals_script = SCRIPTS / "codex_goals.py" + findings_script = SCRIPTS / "codex_findings.py" + with tempfile.TemporaryDirectory() as tmp: + cwd = Path(tmp) + + def run(script: Path, *args: str) -> subprocess.CompletedProcess[str]: + return subprocess.run( + [sys.executable, str(script), *args], + cwd=cwd, + text=True, + capture_output=True, + check=False, + ) + + self.assertEqual( + run( + goals_script, + "create", + "--brief", + "Smoke", + "--goal", + "inspect::Check state", + ).returncode, + 0, + ) + self.assertEqual(run(goals_script, "next").returncode, 0) + + added = run( + findings_script, + "add", + "--title", + "Unresolved issue tied to active goal", + "--evidence", + "The active goal should be inferred when no --goal is provided.", + ) + self.assertEqual(added.returncode, 0, added.stderr) + self.assertIn("goal=G001", added.stdout) + + findings = json.loads((cwd / ".codex-fable5" / "findings.json").read_text()) + self.assertEqual(findings["findings"][0]["goal"], "G001") + + blocked = run( + findings_script, + "block", + "--id", + "F001", + "--reason", + "Needs user decision.", + ) + self.assertEqual(blocked.returncode, 0, blocked.stderr) + + default_gate = run(findings_script, "gate") + self.assertNotEqual(default_gate.returncode, 0) + self.assertIn("F001 [blocked]", default_gate.stdout) + + allow_blocked_gate = run(findings_script, "gate", "--allow-blocked") + self.assertEqual(allow_blocked_gate.returncode, 0, allow_blocked_gate.stderr) + + def test_goal_final_checkpoint_requires_findings_gate(self) -> None: + goals_script = SCRIPTS / "codex_goals.py" + findings_script = SCRIPTS / "codex_findings.py" + with tempfile.TemporaryDirectory() as tmp: + cwd = Path(tmp) + + def run(script: Path, *args: str) -> subprocess.CompletedProcess[str]: + return subprocess.run( + [sys.executable, str(script), *args], + cwd=cwd, + text=True, + capture_output=True, + check=False, + ) + + created = run( + goals_script, + "create", + "--brief", + "Smoke", + "--goal", + "verify::Confirm final state", + ) + self.assertEqual(created.returncode, 0, created.stderr) + self.assertEqual(run(goals_script, "next").returncode, 0) + + added = run( + findings_script, + "add", + "--title", + "Open review issue", + "--evidence", + "The final checkpoint should fail while this is open.", + ) + self.assertEqual(added.returncode, 0, added.stderr) + + blocked_checkpoint = run( + goals_script, + "checkpoint", + "--id", + "G001", + "--status", + "complete", + "--evidence", + "final evidence", + "--verify-cmd", + "smoke", + "--verify-evidence", + "accepted", + ) + self.assertNotEqual(blocked_checkpoint.returncode, 0) + self.assertIn("final story requires findings gate", blocked_checkpoint.stderr) + self.assertIn("F001", blocked_checkpoint.stderr) + + resolved = run( + findings_script, + "resolve", + "--id", + "F001", + "--evidence", + "Closed the review issue.", + "--verify-evidence", + "manual verification accepted", + ) + self.assertEqual(resolved.returncode, 0, resolved.stderr) + + complete = run( + goals_script, + "checkpoint", + "--id", + "G001", + "--status", + "complete", + "--evidence", + "final evidence", + "--verify-cmd", + "smoke", + "--verify-evidence", + "accepted", + ) + self.assertEqual(complete.returncode, 0, complete.stderr) + def test_litellm_config_generation(self) -> None: plain = self.make_litellm_config.build_config("claude-test", "test-alias") prefixed = self.make_litellm_config.build_config("anthropic/claude-test", "test-alias")