azalio · azalio · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/.claude/skills/map-plan/SKILL.md b/.claude/skills/map-plan/SKILL.md
@@ -1,7 +1,7 @@
 ---
 name: map-plan
 description: |
-  ARCHITECT phase only: decompose a complex task into atomic subtasks via task-decomposer. Use when starting a feature, refactor, or complex bug fix and you need a plan first. Do NOT use to execute work; use map-task or map-efficient.
+  ARCHITECT phase only: produce an upfront plan by decomposing a complex task into atomic subtasks with clear dependencies, via task-decomposer. Use when the user asks to plan, create a structured plan, break down, decompose, or stage work — e.g. planning a feature, refactor, migration, API/versioning upgrade, or incremental/phased rollout into smaller independent steps before any code is written. Trigger on phrasing like "plan a…", "create a plan for…", "decompose…into tasks", "break this into steps", "roll out incrementally", or "smaller independent steps". Do NOT use to execute work; use map-task or map-efficient.
 effort: high
 argument-hint: "[task description]"
 ---

diff --git a/.claude/skills/map-skill-eval/SKILL.md b/.claude/skills/map-skill-eval/SKILL.md
@@ -87,6 +87,60 @@ mapify skill-eval run map-plan --eval-set .map/evals/map-plan.json --resume
 - **Run log not found for `--resume`** — `--resume` looks for the latest `.map/eval-runs/<skill>/<timestamp>.jsonl`. If no prior run exists, omit `--resume` to start fresh.
 - **All cases report `not_trigger` unexpectedly** — verify the skill name matches exactly (e.g. `map-plan`, not `map_plan`) and that `.claude/` was seeded correctly in the temp cwd.
 
+## Optimize a skill description
+
+Anti-overfit description optimizer: deterministic 60/40 train/test split, up to N iterations (iteration 0 = baseline = current description). Selects the candidate with the highest held-out TEST pass-rate; an overfit candidate (train pass-rate up, test pass-rate down) is flagged and never selected.
+
+```bash
+mapify skill-eval optimize <skill> --eval-set PATH [--iterations N] [--apply] [--open] [--dry-run]
+```
+
+- `<skill>` — skill to optimize (e.g. `map-plan`).
+- `--eval-set PATH` — eval-set JSON with `>= 5` entries (a 60/40 split needs `n_test >= 3`; a smaller set exits with code 2, spending zero quota).
+- `--iterations N` — maximum optimization iterations (default: 5). Iteration 0 is the baseline.
+- `--apply` — patch the winning description into the SKILL.md frontmatter `description:` of `templates_src/skills/<skill>/SKILL.md.jinja` and re-render so generated trees stay byte-identical; the change is staged, not committed. `skill-rules.json` `description` is NOT auto-patched (update it by hand). Two no-op cases: "No improvement found" (baseline already optimal) and "Winner identical to current".
+- `--open` — open the HTML report in the browser after the run (best-effort; never errors the run).
+- `--dry-run` — print the planned call budget (iterations × (n_train + n_test) dispatch calls + iterations proposer calls) and `model: default (resolved by claude CLI)`, then exit 0 spending zero quota.
+
+Writes a durable `OptimizeResult` JSON and an HTML report to `.map/eval-runs/<skill>/<timestamp>-optimize.json` and `<timestamp>-optimize.html`.
+
+Default mode is propose-only: nothing outside `.map/` is modified.
+
+### Examples
+
+```bash
+# Preview quota usage without spending any
+mapify skill-eval optimize map-plan --eval-set .map/evals/map-plan.json --dry-run
+
+# Run 3 optimization iterations and open the HTML report
+mapify skill-eval optimize map-plan --eval-set .map/evals/map-plan.json --iterations 3 --open
+
+# Run, then auto-apply the winning description if improvement found
+mapify skill-eval optimize map-plan --eval-set .map/evals/map-plan.json --apply
+```
+
+## View an optimization report
+
+Renders the latest (or a specified `--result`) stored `OptimizeResult` JSON as an HTML report.
+
+```bash
+mapify skill-eval view <skill> [--result PATH] [--open]
+```
+
+- `<skill>` — skill whose optimization results to view.
+- `--result PATH` — path to a specific `*-optimize.json` result file; defaults to the latest in `.map/eval-runs/<skill>/`.
+- `--open` — open the rendered HTML report in the browser.
+
+### Examples
+
+```bash
+# View the latest optimization report for map-plan
+mapify skill-eval view map-plan
+
+# Open a specific result file in the browser
+mapify skill-eval view map-plan --result .map/eval-runs/map-plan/20260601T120000-optimize.json --open
+```
+
 ## Related Commands
 
 - `/map-plan` — plan and decompose tasks.

diff --git a/.claude/skills/skill-rules.json b/.claude/skills/skill-rules.json
@@ -244,11 +244,11 @@
       "skillClass": "task",
       "enforcement": "manual",
       "priority": "medium",
-      "description": "Evaluate a /map-* skill's trigger accuracy + cost via mapify skill-eval (claude -p matrix, deterministic assertions, durable resumable runs).",
+      "description": "Evaluate a /map-* skill's trigger accuracy + cost, OR optimize its description via anti-overfit held-out selection, via mapify skill-eval (claude -p matrix, deterministic assertions, durable resumable runs).",
       "requires-cmd": ["claude"],
       "promptTriggers": {
-        "keywords": ["map-skill-eval","skill-eval","skill eval","evaluate skill","trigger accuracy","skill triggering"],
-        "intentPatterns": ["map-skill-eval","(eval|evaluate|measure|test).*(skill).*(trigger|fire|cost)","does .* skill trigger"]
+        "keywords": ["map-skill-eval","skill-eval","skill eval","evaluate skill","trigger accuracy","skill triggering","optimize skill","skill optimize","description optimizer","optimize description"],
+        "intentPatterns": ["map-skill-eval","(eval|evaluate|measure|test).*(skill).*(trigger|fire|cost)","does .* skill trigger","(optimize|improve).*(skill).*(description|trigger)"]
       }
     },
     "map-task": {

diff --git a/.map/scripts/map_orchestrator.py b/.map/scripts/map_orchestrator.py
@@ -2166,6 +2166,29 @@ def _is_cross_repo_path(p: str) -> bool:
             diff_paths = set()
         if diff_paths:
             files_not_in_diff = [p for p in declared if p not in diff_paths]
+        # Gitignored deliverables (e.g. .map/ workflow artifacts like spike
+        # docs or eval-run .jsonl) never appear in git diff/status by design —
+        # that is NOT Actor truncation. Drop any declared path that
+        # `git check-ignore` reports as ignored so it does not raise a false
+        # "Possible Actor truncation" warning. A gitignored file that is also
+        # missing from disk is still flagged separately via missing_files.
+        if files_not_in_diff:
+            try:
+                igproc = _sp.run(
+                    ["git", "check-ignore", "--", *files_not_in_diff],
+                    cwd=project_dir, capture_output=True, text=True, timeout=5,
+                )
+                ignored = {
+                    line.strip()
+                    for line in igproc.stdout.splitlines()
+                    if line.strip()
+                }
+                if ignored:
+                    files_not_in_diff = [
+                        p for p in files_not_in_diff if p not in ignored
+                    ]
+            except (OSError, _sp.TimeoutExpired):
+                pass
 
     state.record_subtask_result(
         subtask_id,

diff --git a/src/mapify_cli/__init__.py b/src/mapify_cli/__init__.py
@@ -1565,6 +1565,229 @@ def validate_graph(
         raise typer.Exit(2)
 
 
+def _open_best_effort(path: Path) -> None:
+    """Open *path* in the default browser — swallow any error (VC5/SC-2)."""
+    import webbrowser  # lazy import: optional use-path
+
+    try:
+        webbrowser.open(path.as_uri())
+    except Exception:  # noqa: BLE001
+        pass  # SC-2: never errors the run
+
+
+def _read_skill_description(root: Path, skill: str) -> str:
+    """Return the description: field from SKILL.md frontmatter, or '' on any failure."""
+    skill_md = root / ".claude" / "skills" / skill / "SKILL.md"
+    if not skill_md.exists():
+        return ""
+    try:
+        from mapify_cli.skill_ir import parse_frontmatter  # lazy import
+
+        text = skill_md.read_text(encoding="utf-8")
+        if not text.startswith("---\n"):
+            return ""
+        close = text.find("\n---", 4)
+        if close == -1:
+            return ""
+        frontmatter_text = text[4:close]
+        parsed = parse_frontmatter(frontmatter_text)
+        return str(parsed.get("description", ""))
+    except Exception:  # noqa: BLE001
+        return ""
+
+
+# ---------------------------------------------------------------------------
+# skill-eval optimize
+# ---------------------------------------------------------------------------
+
+_OPTIMIZE_MIN_ENTRIES: int = 5
+
+
+@skill_eval_app.command("optimize")
+def skill_eval_optimize(
+    skill: str = typer.Argument(..., help="Skill under optimisation, e.g. map-plan"),
+    eval_set: Optional[Path] = typer.Option(
+        None, "--eval-set", help="Path to eval-set JSON"
+    ),
+    iterations: int = typer.Option(
+        5, "--iterations", min=1, help="Total iterations including baseline (default 5)"
+    ),
+    apply: bool = typer.Option(
+        False, "--apply", help="Apply the winning description back to the .jinja source"
+    ),
+    open_html: bool = typer.Option(
+        False, "--open", help="Open the HTML report in the default browser"
+    ),
+    dry_run: bool = typer.Option(
+        False, "--dry-run", help="Print planned call budget; spend nothing, no dispatcher"
+    ),
+) -> None:
+    """Optimise a skill's trigger description via repeated eval iterations.
+
+    Exit codes:
+      0 - Success (or dry-run completed)
+      1 - Runtime error (claude not found)
+      2 - Validation error (missing --eval-set, malformed eval-set, or < 5 entries)
+    """
+    import json  # lazy — keep top-level import time low
+
+    import mapify_cli.skills_eval.runner as _runner
+    from datetime import timezone
+
+    # 1. --eval-set is required.
+    if eval_set is None:
+        console.print("[bold red]Error:[/bold red] provide --eval-set PATH")
+        raise typer.Exit(2)
+
+    # 2. Load and validate eval-set.
+    try:
+        entries = _runner.load_eval_set(eval_set)
+    except ValueError as exc:
+        console.print(f"[bold red]Error:[/bold red] {exc}")
+        raise typer.Exit(2)
+
+    # 3. MIN-SIZE guard — BEFORE dry-run and BEFORE any dispatcher (VC2).
+    if len(entries) < _OPTIMIZE_MIN_ENTRIES:
+        console.print(
+            f"[bold red]Error:[/bold red] eval-set has {len(entries)} "
+            f"{'entry' if len(entries) == 1 else 'entries'}; "
+            f"optimize requires >= {_OPTIMIZE_MIN_ENTRIES} entries"
+        )
+        raise typer.Exit(2)
+
+    # 4. DRY-RUN — print budget, exit 0, construct NO dispatcher (VC1).
+    if dry_run:
+        from mapify_cli.skills_eval.description_optimizer import (
+            _DEFAULT_SEED,
+            split_train_test,
+        )
+
+        train, test = split_train_test(entries, _DEFAULT_SEED)
+        n_train = len(train)
+        n_test = len(test)
+        total_dispatches = iterations * (n_train + n_test)
+        console.print(
+            f"[bold]Dry-run:[/bold] "
+            f"{iterations} x ({n_train}+{n_test}) = [cyan]{total_dispatches}[/cyan] "
+            f"dispatch calls + [cyan]{iterations}[/cyan] proposer calls"
+        )
+        console.print("model: default (resolved by claude CLI)")
+        raise typer.Exit(0)
+
+    # 5. CLAUDE CHECK — require claude BEFORE any invocation (VC3).
+    if shutil.which("claude") is None:
+        console.print(
+            "[bold red]Error:[/bold red] requires-cmd: claude — "
+            "install the claude CLI and ensure it is on PATH"
+        )
+        raise typer.Exit(1)
+
+    # 6. REAL RUN.
+    import mapify_cli.skills_eval.proposer as _proposer
+    from mapify_cli.skills_eval.description_optimizer import optimize
+    from mapify_cli.skills_eval.viewer import render_to_path
+
+    root = Path.cwd()
+    out_dir = root / ".map" / "eval-runs" / skill
+    out_dir.mkdir(parents=True, exist_ok=True)
+    run_ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
+
+    current_description = _read_skill_description(root, skill)
+
+    result = optimize(
+        skill=skill,
+        entries=entries,
+        current_description=current_description,
+        proposer=_proposer.propose_description,
+        dispatcher=None,
+        source_claude_dir=root / ".claude",
+        out_dir=out_dir,
+        run_ts=run_ts,
+        iterations=iterations,
+    )
+
+    json_path = out_dir / f"{run_ts}-optimize.json"
+    html_path = out_dir / f"{run_ts}-optimize.html"
+    json_path.write_text(json.dumps(result.to_dict(), indent=2), encoding="utf-8")
+    render_to_path(result, html_path)
+
+    status_label = "no improvement" if result.no_improvement else f"iter {result.winning_iteration}"
+    winner_iter = next(
+        (it for it in result.iterations if it.selected),
+        None,
+    )
+    test_pass_rate = winner_iter.test_pass_rate if winner_iter is not None else 0.0
+    console.print(
+        f"[bold]Optimize complete:[/bold] skill=[bold]{skill}[/bold] "
+        f"winner=[cyan]{status_label}[/cyan] "
+        f"test_pass_rate=[cyan]{test_pass_rate:.1%}[/cyan]"
+    )
+    console.print(f"  artifact: [cyan]{json_path}[/cyan]")
+
+    if apply:
+        from mapify_cli.skills_eval.apply_patcher import apply_optimized_description
+
+        apply_optimized_description(
+            skill=skill,
+            winner=result.winning_description,
+            current_description=current_description,
+            no_improvement=result.no_improvement,
+            repo_root=root,
+            stage=True,
+        )
+
+    if open_html:
+        _open_best_effort(html_path)
+
+
+# ---------------------------------------------------------------------------
+# skill-eval view
+# ---------------------------------------------------------------------------
+
+
+@skill_eval_app.command("view")
+def skill_eval_view(
+    skill: str = typer.Argument(..., help="Skill whose optimization result to view"),
+    result_path: Optional[Path] = typer.Option(
+        None, "--result", help="Path to a specific *-optimize.json file"
+    ),
+    open_html: bool = typer.Option(
+        False, "--open", help="Open the HTML report in the default browser"
+    ),
+) -> None:
+    """Render the latest (or specified) optimize result as an HTML report.
+
+    Exit codes:
+      0 - Success
+      2 - No optimize result found
+    """
+    import json
+
+    from mapify_cli.skills_eval.eval_schema import OptimizeResult
+    from mapify_cli.skills_eval.viewer import render_to_path
+
+    out_dir = Path.cwd() / ".map" / "eval-runs" / skill
+
+    if result_path is not None:
+        path = result_path
+    else:
+        candidates = sorted(out_dir.glob("*-optimize.json"))
+        if not candidates:
+            console.print(
+                f"[bold red]Error:[/bold red] no optimize result found under {out_dir}"
+            )
+            raise typer.Exit(2)
+        path = candidates[-1]
+
+    res = OptimizeResult.from_dict(json.loads(path.read_text(encoding="utf-8")))
+    html = path.with_suffix(".html")
+    render_to_path(res, html)
+    console.print(f"  report: [cyan]{html}[/cyan]")
+
+    if open_html:
+        _open_best_effort(html)
+
+
 def main():
     app()