From bec26efd2454358b5b6f651d51e39de7dafe650c Mon Sep 17 00:00:00 2001 From: luiseiman Date: Thu, 4 Jun 2026 09:41:00 -0300 Subject: [PATCH] refactor(v3): retire override audit trail + capture loop, keep escalation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step #1 validation found 0 overrides across all 12 projects in ~7 weeks (production included; only 4/12 adopted behaviors). The auditable override trail — the v3-vs-hookify differential — has no empirical use, and the v4 capture loop processed an always-empty log. Retired as dead weight. The graduated escalation engine stays: it is exercised and the happy path is "verify", not "override". Removed: - scripts/process-override-log.sh, tests/test-process-override-log.sh - template/hooks + .claude/hooks session-start-process-overrides.sh - .forge/audit/overrides.log (git-tracked) - lib.sh: forge_override_append, FORGE_AUDIT_LOG, audit-log append in try_override - SessionStart wiring in settings.json + template/settings.json.tmpl - audit Dimension B item B3 (override loop); B now 4 items (forge_adoption 0-4) Kept: - forge_pending_block_try_override (override-via-reinvocation, state.json only) - full soft_block escalation Updated both scoring engines (score.sh + audit_all.py), checklist/scoring/SKILL, registry, CLAUDE.md, README, usage-guide, guia-uso. 19 tests green. Co-Authored-By: Claude Opus 4.8 --- .../hooks/session-start-process-overrides.sh | 38 --- .claude/settings.json | 5 - .forge/audit/overrides.log | 0 CLAUDE.md | 2 +- README.md | 12 +- audit/checklist.md | 16 +- audit/score.sh | 40 ++-- audit/scoring.md | 8 +- .../tests/scenario_override_reinvocation.sh | 6 +- docs/guia-uso.md | 11 +- docs/usage-guide.md | 11 +- registry/projects.yml | 6 +- scripts/audit_all.py | 19 +- scripts/process-override-log.sh | 184 --------------- scripts/runtime/lib.sh | 40 +--- scripts/runtime/tests/test_pending_block.sh | 4 - skills/audit-project/SKILL.md | 20 +- .../hooks/session-start-process-overrides.sh | 38 --- template/settings.json.tmpl | 5 - tests/test-process-override-log.sh | 219 ------------------ 20 files changed, 61 insertions(+), 623 deletions(-) delete mode 100755 .claude/hooks/session-start-process-overrides.sh delete mode 100644 .forge/audit/overrides.log delete mode 100755 scripts/process-override-log.sh delete mode 100755 template/hooks/session-start-process-overrides.sh delete mode 100755 tests/test-process-override-log.sh diff --git a/.claude/hooks/session-start-process-overrides.sh b/.claude/hooks/session-start-process-overrides.sh deleted file mode 100755 index d781387..0000000 --- a/.claude/hooks/session-start-process-overrides.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bash -# dotforge v4 — SessionStart hook: process override log -# -# Calls scripts/process-override-log.sh from $DOTFORGE_DIR to capture frequent -# soft_block overrides into practices/inbox/auto-override-*.md. -# -# Non-blocking: always exits 0. Failures are logged to stderr (visible via -# CLAUDE_CODE_DEBUG=hooks) but never prevent session start. -# -# Configuration: see scripts/process-override-log.sh - -set -uo pipefail - -# Skip silently if DOTFORGE_DIR is not set (project not bootstrapped via dotforge) -if [[ -z "${DOTFORGE_DIR:-}" ]]; then - exit 0 -fi - -SCRIPT="${DOTFORGE_DIR}/scripts/process-override-log.sh" - -# Skip if dotforge has v3 only (no v4 script yet) -if [[ ! -x "$SCRIPT" ]]; then - exit 0 -fi - -# Run with a short timeout to never block session start. -# Portable: prefer gtimeout (macOS+coreutils) then timeout (Linux), else run unbounded. -if command -v gtimeout >/dev/null 2>&1; then - gtimeout 5 "$SCRIPT" 2>&1 | head -3 1>&2 || true -elif command -v timeout >/dev/null 2>&1; then - timeout 5 "$SCRIPT" 2>&1 | head -3 1>&2 || true -else - # No timeout binary (macOS without coreutils). Script has internal early-exits - # and bounded work (file size). Run unbounded; suppress errors to never block. - "$SCRIPT" 2>&1 | head -3 1>&2 || true -fi - -exit 0 diff --git a/.claude/settings.json b/.claude/settings.json index f22017a..07a853f 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -49,11 +49,6 @@ { "type": "command", "command": ".claude/hooks/session-startup.sh" - }, - { - "type": "command", - "command": ".claude/hooks/session-start-process-overrides.sh", - "timeout": 5 } ] } diff --git a/.forge/audit/overrides.log b/.forge/audit/overrides.log deleted file mode 100644 index e69de29..0000000 diff --git a/CLAUDE.md b/CLAUDE.md index 37cd15b..9d8a57e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -73,7 +73,7 @@ Seven subagent definitions in `agents/`: researcher (read-only exploration), arc ### Audit System -Two-dimension model (v4.x). **Dimension A — Native Health** (`score`, 0-10): 5 obligatory items (0-2) + 10 recommended (0-1), normalized as `obligatory*0.7 + recommended*0.3`. Security-critical items (settings.json, block-destructive hook) cap it at 6.0 if missing. Measures good use of native Claude Code (auto-memory as index, permission cascade, attribution, sandbox, deny rules). **Dimension B — dotforge Adoption** (`forge_adoption`, 0-5): behaviors/workflows/override-loop/domain-rules/sync-recency. **Informational — does NOT affect Native Health.** A native-first project scoring B=0 with A=10 is a desirable outcome (see `.claude/rules/domain/native-vs-dotforge-boundary.md`). `audit/checklist.md` + `audit/scoring.md` are the source of truth; registry in `registry/projects.yml` tracks both across managed projects. **Two scoring engines reimplement the checklist independently — `audit/score.sh` (bash, CI gate) and `scripts/audit_all.py` (Python, 12-project re-auditor). Any checklist change must update BOTH plus `audit.yml` and the docs (`README.md`, `docs/usage-guide.md`, `docs/guia-uso.md`); grep all consumers before planning the edit.** +Two-dimension model (v4.x). **Dimension A — Native Health** (`score`, 0-10): 5 obligatory items (0-2) + 10 recommended (0-1), normalized as `obligatory*0.7 + recommended*0.3`. Security-critical items (settings.json, block-destructive hook) cap it at 6.0 if missing. Measures good use of native Claude Code (auto-memory as index, permission cascade, attribution, sandbox, deny rules). **Dimension B — dotforge Adoption** (`forge_adoption`, 0-4): behaviors/workflows/domain-rules/sync-recency. **Informational — does NOT affect Native Health.** A native-first project scoring B=0 with A=10 is a desirable outcome (see `.claude/rules/domain/native-vs-dotforge-boundary.md`). `audit/checklist.md` + `audit/scoring.md` are the source of truth; registry in `registry/projects.yml` tracks both across managed projects. **Two scoring engines reimplement the checklist independently — `audit/score.sh` (bash, CI gate) and `scripts/audit_all.py` (Python, 12-project re-auditor). Any checklist change must update BOTH plus `audit.yml` and the docs (`README.md`, `docs/usage-guide.md`, `docs/guia-uso.md`); grep all consumers before planning the edit.** ### Integrations diff --git a/README.md b/README.md index 47f03a3..8d4ed55 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ [![Version](https://img.shields.io/badge/version-4.0.0-blue)](VERSION) [![Last commit](https://img.shields.io/github/last-commit/luiseiman/dotforge)](https://github.com/luiseiman/dotforge/commits/main) -**Behavior governance for [Claude Code](https://docs.anthropic.com/en/docs/claude-code).** Declare runtime policies on tool calls — "search before writing", "no destructive git", "verify before shipping" — and enforce them via compiled `PreToolUse` hooks that share a session-scoped state file. Escalates silently → nudge → warning → soft_block → hard_block, with a permanent override audit trail. +**Behavior governance for [Claude Code](https://docs.anthropic.com/en/docs/claude-code).** Declare runtime policies on tool calls — "search before writing", "no destructive git", "verify before shipping" — and enforce them via compiled `PreToolUse` hooks that share a session-scoped state file. Escalates silently → nudge → warning → soft_block → hard_block. ``` behaviors/no-destructive-git/behavior.yaml # declarative policy @@ -19,7 +19,6 @@ behaviors/no-destructive-git/behavior.yaml # declarative policy .claude/hooks/generated/*.sh # runtime enforcement ↓ (observe) .forge/runtime/state.json # counters, flags, per-session -.forge/audit/overrides.log # override audit (git-tracked) ``` Other tools stop at configuration. dotforge governs behavior — and keeps auditing, syncing, and evolving your `.claude/` setup across every repo you manage. @@ -34,12 +33,11 @@ For people and teams managing more than one Claude Code project. ## v4.0 — what's new (2026-06-03) -### Override capture loop closes practices↔behaviors (v4.0.0) +### Audit two-dimension model + override loop retired (v4.0.x) -- **`scripts/process-override-log.sh`** — bash script that processes `.forge/audit/overrides.log` and auto-creates `practices/inbox/auto-override-*.md` for behaviors overridden ≥3 times in 30 days. Idempotent. 10/10 tests green. Cost: 0 LLM calls, pure bash. -- **`session-start-process-overrides.sh`** wired in `SessionStart` (template + self-hosting) — auto-captures frequent overrides as practices on every session start. +- **Override capture loop — shipped in v4.0.0, retired after validation.** A portfolio scan found **0 overrides across all 12 projects in ~7 weeks** (production included; only 4/12 adopted behaviors at all). The auditable override trail + capture loop (`process-override-log.sh`, `overrides.log`, SessionStart wiring) were removed as dead weight. The graduated escalation engine (soft_block) — which IS exercised; the happy path is "verify", not "override" — stays. See [`.claude/rules/domain/native-vs-dotforge-boundary.md`](.claude/rules/domain/native-vs-dotforge-boundary.md). - **`scripts/migrate-v3-to-v4.sh`** — safe migration script with mandatory `--dry-run`, atomic backup, `--rollback`. See [`docs/v4/MIGRATION-V3-TO-V4.md`](docs/v4/MIGRATION-V3-TO-V4.md). -- **Audit two-dimension model** — **Native Health** (0-10: native Claude Code usage + security) + **dotforge Adoption** (0-5: informational, does not affect the score). Behaviors / workflows / override-loop moved to the non-penalizing Adoption dimension — native-first projects no longer lose points for skipping dotforge machinery. New Native-Health items: auto-memory hygiene, permission cascade, attribution. +- **Audit two-dimension model** — **Native Health** (0-10: native Claude Code usage + security) + **dotforge Adoption** (0-4: informational, does not affect the score). Behaviors / workflows moved to the non-penalizing Adoption dimension — native-first projects no longer lose points for skipping dotforge machinery. New Native-Health items: auto-memory hygiene, permission cascade, attribution. - **`domain/workflow-economics.md`** (new domain rule) — documents v4 PoC cost-quality findings. Decision matrix: when workflow vs skill. Token economy principles. **TL;DR: workflows are 4-25x more expensive than bash skills for recurring work — use only as on-demand escalation, not as default refactor.** - **`workflows/watch.js`** ships as REFERENCE implementation, NOT promoted to `/forge watch` default. The bash skill remains the production tool. @@ -300,7 +298,7 @@ Orchestration follows a decision tree: researcher → architect → implementer `/forge audit` scores your project's Claude Code configuration on a 10-point scale: - **5 obligatory items** (scored 0-2): CLAUDE.md, settings.json, rules with globs, block-destructive hook, build/test commands -- **12 recommended items** (scored 0-1): CLAUDE_ERRORS.md, lint hook, custom commands, memory, agents + orchestration rule, .gitignore secrets, prompt injection scan, auto-mode safety, v3 behaviors enforcement, OS-level sandboxing, **v4 workflow availability**, **v4 override capture loop active** +- **12 recommended items** (scored 0-1): CLAUDE_ERRORS.md, lint hook, custom commands, memory, agents + orchestration rule, .gitignore secrets, prompt injection scan, auto-mode safety, v3 behaviors enforcement, OS-level sandboxing, **v4 workflow availability**, domain rules - **Project tier**: simple/standard/complex adjusts scoring expectations - **Security cap**: missing settings.json or block-destructive hook caps score at 6.0 diff --git a/audit/checklist.md b/audit/checklist.md index ccda677..1714575 100644 --- a/audit/checklist.md +++ b/audit/checklist.md @@ -3,7 +3,7 @@ El audit tiene **dos dimensiones independientes**: - **A — Salud Nativa** (score 0-10): ¿el proyecto usa bien Claude Code nativo + seguridad? Es el score que importa para cualquier proyecto, use o no la maquinaria dotforge. -- **B — Adopción dotforge** (informativo 0-5): ¿cuánto adoptó la gobernanza dotforge? **NO penaliza** la Salud Nativa. Un proyecto native-first puro saca 0/5 acá sin perder un punto en A. +- **B — Adopción dotforge** (informativo 0-4): ¿cuánto adoptó la gobernanza dotforge? **NO penaliza** la Salud Nativa. Un proyecto native-first puro saca 0/4 acá sin perder un punto en A. --- @@ -100,9 +100,9 @@ El audit tiene **dos dimensiones independientes**: --- -# Dimensión B — Adopción dotforge (informativo, 0-5) +# Dimensión B — Adopción dotforge (informativo, 0-4) -**No afecta el score de Salud Nativa.** Mide cuánto adoptó el proyecto la maquinaria de gobernanza dotforge. Reportar como `Adopción: N/5` con label (0=None, 1-2=Partial, 3-4=Most, 5=Full). Sirve para decidir propagación, no para juzgar calidad. +**No afecta el score de Salud Nativa.** Mide cuánto adoptó el proyecto la maquinaria de gobernanza dotforge. Reportar como `Adopción: N/4` con label (0=None, 1-2=Partial, 3=Most, 4=Full). Sirve para decidir propagación, no para juzgar calidad. ### B1. Behaviors v3 compilados y wired - 0: Sin behaviors enforced — declaración en `behaviors/index.yaml` sola NO cuenta @@ -116,19 +116,13 @@ El audit tiene **dos dimensiones independientes**: **Verificación:** `grep -q "export const meta" workflows/*.js`. Señal de gobernanza, no de calidad — los bash skills siguen siendo el workhorse. Ver `docs/v4/SPEC.md`. -### B3. Override capture loop activo (v4) -- 0: `.forge/audit/overrides.log` no rastreado O `session-start-process-overrides.sh` no wired -- 1: Ambos presentes: log existe Y el hook está en `.claude/settings.json` SessionStart - -**Verificación:** `test -f .forge/audit/overrides.log && grep -q "session-start-process-overrides.sh" .claude/settings.json`. Solo significativo si hay behaviors activos. Ver `scripts/process-override-log.sh`. - -### B4. Domain rules +### B3. Domain rules - 0: No hay `.claude/rules/domain/` - 1: Al menos un domain rule presente y fresco (`last_verified` <90 días) **Verificación:** Contar archivos en `.claude/rules/domain/`. Reportar cuántos están stale (>90 días). Si hay lógica de negocio pero no domain rules, sugerir `/forge domain extract`. -### B5. Sync recency +### B4. Sync recency - 0: `dotforge_version` del proyecto desfasado respecto a `VERSION` por ≥1 minor, o desconocido - 1: Proyecto sincronizado a la versión actual de dotforge (`dotforge_version` == `VERSION`) diff --git a/audit/score.sh b/audit/score.sh index 02dba41..5d6ab0b 100755 --- a/audit/score.sh +++ b/audit/score.sh @@ -7,7 +7,7 @@ # Two-dimension model (v4.x — see audit/checklist.md + audit/scoring.md): # Dimension A — Native Health: 5 obligatory (0-2) + 10 recommended (0-1). # score = obl*0.7 + rec*0.3, security cap 6.0. The CI gate. -# Dimension B — dotforge Adoption: 5 items (0-1). Informational, 0-5. +# Dimension B — dotforge Adoption: 4 items (0-1). Informational, 0-4. # Does NOT affect Native Health. # Semantic checks (CLAUDE.md quality, rule content) are approximated with heuristics. # Score is indicative — /forge audit provides authoritative semantic evaluation. @@ -44,8 +44,8 @@ cd "$PROJECT_DIR" s1=0; n1=""; s2=0; n2=""; s3=0; n3=""; s4=0; n4=""; s5=0; n5="" s6=0; n6=""; s7=0; n7=""; s8=0; n8=""; s9=0; n9=""; s10=0; n10="" s11=0; n11=""; s12=0; n12=""; s13=0; n13=""; s14=0; n14=""; s15=0; n15="" -# --- Dimension B: scores (b1..b5) and notes (m1..m5) --- -b1=0; m1=""; b2=0; m2=""; b3=0; m3=""; b4=0; m4=""; b5=0; m5="" +# --- Dimension B: scores (b1..b4) and notes (m1..m4) --- +b1=0; m1=""; b2=0; m2=""; b3=0; m3=""; b4=0; m4="" # ───────────────────────────────────────────────────────────────────────────── # DIMENSION A — OBLIGATORIO (each 0-2) @@ -320,24 +320,16 @@ else b2=0; m2="No workflows/ with valid meta block" fi -# B3. Override capture loop active (v4) -if [[ -f ".forge/audit/overrides.log" ]] && [[ -f "$SETTINGS" ]] \ - && grep -q "session-start-process-overrides.sh" "$SETTINGS" 2>/dev/null; then - b3=1; m3="overrides.log present and hook wired in SessionStart" -else - b3=0; m3="Override loop not wired (log + SessionStart hook required)" -fi - -# B4. Domain rules present +# B3. Domain rules present DOM=$(ls .claude/rules/domain/*.md 2>/dev/null | wc -l | tr -d ' ') if [[ "${DOM:-0}" -gt 0 ]]; then - b4=1; m4="${DOM} domain rule(s) (freshness checked semantically by /forge audit)" + b3=1; m3="${DOM} domain rule(s) (freshness checked semantically by /forge audit)" else - b4=0; m4="No domain rules in .claude/rules/domain/" + b3=0; m3="No domain rules in .claude/rules/domain/" fi -# B5. Sync recency — not mechanically determinable standalone (needs registry) -b5=0; m5="Sync recency indeterminate standalone — resolved by /forge audit via registry" +# B4. Sync recency — not mechanically determinable standalone (needs registry) +b4=0; m4="Sync recency indeterminate standalone — resolved by /forge audit via registry" # ───────────────────────────────────────────────────────────────────────────── # Calculate scores @@ -352,10 +344,10 @@ if [[ $s2 -eq 0 || $s4 -eq 0 ]]; then NATIVE_HEALTH=$(awk "BEGIN { v=${NATIVE_HEALTH}; printf \"%.2f\", (v > 6.0 ? 6.0 : v) }") fi -FORGE_ADOPTION=$((b1 + b2 + b3 + b4 + b5)) +FORGE_ADOPTION=$((b1 + b2 + b3 + b4)) if [[ $FORGE_ADOPTION -eq 0 ]]; then ADOPTION_LABEL="None" elif [[ $FORGE_ADOPTION -le 2 ]]; then ADOPTION_LABEL="Partial" -elif [[ $FORGE_ADOPTION -le 4 ]]; then ADOPTION_LABEL="Most" +elif [[ $FORGE_ADOPTION -eq 3 ]]; then ADOPTION_LABEL="Most" else ADOPTION_LABEL="Full" fi @@ -409,9 +401,8 @@ data = { "adoption_items": { "B1_behaviors": {"score": ${b1}, "note": "$(_san "$m1")"}, "B2_workflows": {"score": ${b2}, "note": "$(_san "$m2")"}, - "B3_override_loop": {"score": ${b3}, "note": "$(_san "$m3")"}, - "B4_domain_rules": {"score": ${b4}, "note": "$(_san "$m4")"}, - "B5_sync_recency": {"score": ${b5}, "note": "$(_san "$m5")"} + "B3_domain_rules": {"score": ${b3}, "note": "$(_san "$m3")"}, + "B4_sync_recency": {"score": ${b4}, "note": "$(_san "$m4")"} } } print(json.dumps(data, indent=2)) @@ -421,7 +412,7 @@ else $SECURITY_CAP && CAP_NOTE=" ⚠ security cap applied (settings.json or block-destructive missing)" echo "═══ AUDIT SCORE: $(basename "$PROJECT_DIR") ═══" echo "Native Health: ${NATIVE_HEALTH}/10 (${LEVEL})${CAP_NOTE}" - echo "dotforge Adoption: ${FORGE_ADOPTION}/5 (${ADOPTION_LABEL}) [informational — does not affect Native Health]" + echo "dotforge Adoption: ${FORGE_ADOPTION}/4 (${ADOPTION_LABEL}) [informational — does not affect Native Health]" echo "" echo "═ DIMENSION A — NATIVE HEALTH ═" echo "── OBLIGATORIO (${SCORE_OBL}/10) ──" @@ -446,9 +437,8 @@ else echo "═ DIMENSION B — DOTFORGE ADOPTION (informational) ═" printf " [%s] B1. v3 behaviors %s\n" "$b1" "$m1" printf " [%s] B2. Workflow available %s\n" "$b2" "$m2" - printf " [%s] B3. Override loop %s\n" "$b3" "$m3" - printf " [%s] B4. Domain rules %s\n" "$b4" "$m4" - printf " [%s] B5. Sync recency %s\n" "$b5" "$m5" + printf " [%s] B3. Domain rules %s\n" "$b3" "$m3" + printf " [%s] B4. Sync recency %s\n" "$b4" "$m4" fi # CI threshold gate (on Native Health) diff --git a/audit/scoring.md b/audit/scoring.md index ca8c740..c361e9f 100644 --- a/audit/scoring.md +++ b/audit/scoring.md @@ -1,6 +1,6 @@ # Scoring de Auditoría -El audit produce **dos números independientes**: `native_health` (0-10, el score principal) y `forge_adoption` (0-5, informativo). +El audit produce **dos números independientes**: `native_health` (0-10, el score principal) y `forge_adoption` (0-4, informativo). ## Dimensión A — Salud Nativa (score principal) @@ -34,7 +34,7 @@ Si alguno de estos items es **0**, `native_health` tiene un cap máximo de **6.0 ## Dimensión B — Adopción dotforge (informativo) ``` -forge_adoption = sum(items B1-B5) # 0-5 +forge_adoption = sum(items B1-B4) # 0-4 ``` **No entra en `native_health` ni lo modifica.** Es un indicador de cuánta gobernanza dotforge adoptó el proyecto. @@ -43,8 +43,8 @@ forge_adoption = sum(items B1-B5) # 0-5 |----|-------|---------| | 0 | None | Native-first puro. Válido y sin penalización. | | 1-2 | Partial | Adopción parcial de la maquinaria. | -| 3-4 | Most | Adopción amplia. | -| 5 | Full | Gobernanza dotforge completa. | +| 3 | Most | Adopción amplia. | +| 4 | Full | Gobernanza dotforge completa. | Un `forge_adoption: 0` con `native_health: 10` es un resultado **excelente y deseable** bajo el principio native-first (ver `.claude/rules/domain/native-vs-dotforge-boundary.md`). No recomendar adoptar maquinaria dotforge solo para subir B. diff --git a/behaviors/search-first/tests/scenario_override_reinvocation.sh b/behaviors/search-first/tests/scenario_override_reinvocation.sh index 7385a8e..a7c88c1 100755 --- a/behaviors/search-first/tests/scenario_override_reinvocation.sh +++ b/behaviors/search-first/tests/scenario_override_reinvocation.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash # Override reinvocation: escalate to soft_block, then the exact same tool_input # comes back → detected as override, passes silently, records audit in three -# places (state.overrides, .forge/audit/overrides.log, pending_block cleared). +# places (state.overrides, pending_block cleared). set -u . "$(dirname "$0")/_scenario_helpers.sh" trap scenario_cleanup EXIT @@ -48,10 +48,6 @@ ov_counter=$(jq -r --arg sid "$SCENARIO_SESSION_ID" \ '.sessions[$sid].behaviors["search-first"].overrides[0].counter_at_override' "$FORGE_STATE_FILE") assert_eq "5" "$ov_counter" "override captured counter=5" || exit 1 -# .forge/audit/overrides.log has 1 line -audit_lines=$(wc -l < "${FORGE_ROOT}/audit/overrides.log" | tr -d ' ') -assert_eq "1" "$audit_lines" "audit log has one override entry" || exit 1 - # pending_block cleared pending_after=$(jq -r --arg sid "$SCENARIO_SESSION_ID" \ '.sessions[$sid].behaviors["search-first"].pending_block // "null"' "$FORGE_STATE_FILE") diff --git a/docs/guia-uso.md b/docs/guia-uso.md index 02b984b..67c8004 100644 --- a/docs/guia-uso.md +++ b/docs/guia-uso.md @@ -343,7 +343,7 @@ Cada stack aporta: ### Dos dimensiones - **A — Salud Nativa** (0-10): buen uso de Claude Code nativo + seguridad. El score principal. -- **B — Adopción dotforge** (0-5): cuánta gobernanza dotforge adoptó el proyecto. **Informativo — no afecta la Salud Nativa.** Un proyecto native-first con B=0 y A=10 es un resultado deseable. +- **B — Adopción dotforge** (0-4): cuánta gobernanza dotforge adoptó el proyecto. **Informativo — no afecta la Salud Nativa.** Un proyecto native-first con B=0 y A=10 es un resultado deseable. ### Dimensión A — Salud Nativa (15 items) @@ -372,21 +372,20 @@ Cada stack aporta: | 14 | Comandos custom | Al menos 1 comando relevante | | 15 | Agentes | Instalados + regla de orquestación activa | -### Dimensión B — Adopción dotforge (5 items, informativo) +### Dimensión B — Adopción dotforge (4 items, informativo) | # | Item | Criterio | |---|------|----------| | B1 | Behaviors v3 compilados | Hook compilado en `.claude/hooks/generated/` Y wired en settings.json | | B2 | Workflow availability | `workflows/` con al menos un `.js` con `export const meta` | -| B3 | Override capture loop | `.forge/audit/overrides.log` + `session-start-process-overrides.sh` wired en SessionStart | -| B4 | Domain rules | Al menos un rule en `.claude/rules/domain/` (frescura evaluada semánticamente) | -| B5 | Sync recency | `dotforge_version` del proyecto == `VERSION` actual | +| B3 | Domain rules | Al menos un rule en `.claude/rules/domain/` (frescura evaluada semánticamente) | +| B4 | Sync recency | `dotforge_version` del proyecto == `VERSION` actual | ### Fórmula de scoring ``` native_health = obligatorio × 0.7 + recomendado × 0.3 # 0-10, score principal -forge_adoption = sum(B1..B5) # 0-5, informativo +forge_adoption = sum(B1..B4) # 0-4, informativo ``` - Obligatorios perfectos sin recomendados = **7.0** (Bueno) diff --git a/docs/usage-guide.md b/docs/usage-guide.md index 197a658..14bbddd 100644 --- a/docs/usage-guide.md +++ b/docs/usage-guide.md @@ -395,7 +395,7 @@ Each stack provides: The audit produces two independent numbers: - **A — Native Health** (0-10): good use of native Claude Code + security. The primary score. -- **B — dotforge Adoption** (0-5): how much dotforge governance the project adopted. **Informational — does not affect Native Health.** A native-first project scoring B=0 with A=10 is a desirable outcome. +- **B — dotforge Adoption** (0-4): how much dotforge governance the project adopted. **Informational — does not affect Native Health.** A native-first project scoring B=0 with A=10 is a desirable outcome. ### Dimension A — Native Health (15 items) @@ -424,21 +424,20 @@ The audit produces two independent numbers: | 14 | Custom commands | At least 1 relevant command | | 15 | Agents | Installed + active orchestration rule | -### Dimension B — dotforge Adoption (5 items, informational) +### Dimension B — dotforge Adoption (4 items, informational) | # | Item | Criteria | |---|------|----------| | B1 | v3 behaviors compiled | Compiled hook under `.claude/hooks/generated/` AND wired in settings.json | | B2 | Workflow availability | `workflows/` with at least one `.js` containing `export const meta` | -| B3 | Override capture loop | `.forge/audit/overrides.log` + `session-start-process-overrides.sh` wired in SessionStart | -| B4 | Domain rules | At least one rule in `.claude/rules/domain/` (freshness checked semantically) | -| B5 | Sync recency | Project `dotforge_version` == current `VERSION` | +| B3 | Domain rules | At least one rule in `.claude/rules/domain/` (freshness checked semantically) | +| B4 | Sync recency | Project `dotforge_version` == current `VERSION` | ### Scoring formula ``` native_health = required x 0.7 + recommended x 0.3 # 0-10, the primary score -forge_adoption = sum(B1..B5) # 0-5, informational +forge_adoption = sum(B1..B4) # 0-4, informational ``` - Perfect required items without recommended = **7.0** (Good) diff --git a/registry/projects.yml b/registry/projects.yml index 051ef3b..3f8b7fe 100644 --- a/registry/projects.yml +++ b/registry/projects.yml @@ -15,7 +15,7 @@ # score = native_health = obligatorio*0.7 + recomendado*0.3 (max 10.0) # items 1-5 obligatorios (0-2), items 6-15 recomendados (0-1) # security cap 6.0 if item 2 or 4 == 0 -# forge_adoption = sum(items B1-B5), 0-5, INFORMATIONAL — does not affect score +# forge_adoption = sum(items B1-B4), 0-4, INFORMATIONAL — does not affect score # history: array of {date, score, adoption, version} — appended, never overwritten # metrics_summary: aggregated from ~/.claude/metrics/{slug}/ via /forge insights # @@ -43,11 +43,11 @@ projects: last_audit: 2026-04-14 dotforge_version: 3.0.4 score: 9.7 - forge_adoption: 5 + forge_adoption: 4 history: - {date: 2026-04-08, score: 10.0, version: 2.9.1} - {date: 2026-04-14, score: 9.7, version: 3.0.4} - notes: "Reference config. native_health 9.7, adoption Full (5/5). Sandbox pending (item 9)." + notes: "Reference config. native_health 9.7, adoption Full (4/4). Sandbox pending (item 9)." - name: cds-dashboard path: /Users/luiseiman/Documents/jira nbch/cds-dashboard diff --git a/scripts/audit_all.py b/scripts/audit_all.py index 1bc1d31..ba39244 100755 --- a/scripts/audit_all.py +++ b/scripts/audit_all.py @@ -4,7 +4,7 @@ Deterministic, script-based alternative to running the /audit-project skill 12 times. Two-dimension model (v4.x): - Native Health (score, 0-10): 5 obligatory + 10 recommended native-usage items. - - dotforge Adoption (forge_adoption, 0-5): informational, does not affect score. + - dotforge Adoption (forge_adoption, 0-4): informational, does not affect score. Usage: python3 scripts/audit_all.py [--dry-run] """ @@ -313,18 +313,13 @@ def audit(proj_path: Path, name: str, version: str, prev_version) -> dict: r["adoption"]["B2_workflows"] = 1 if any( "export const meta" in read_text(f, 5000) for f in wf_files) else 0 - # B3: override capture loop - override_log = proj_path / ".forge/audit/overrides.log" - wired_override = bool(s and "session-start-process-overrides.sh" in json.dumps(s)) - r["adoption"]["B3_override_loop"] = 1 if (override_log.exists() and wired_override) else 0 - - # B4: domain rules + # B3: domain rules domain_dir = rules_dir / "domain" domain_rules = list(domain_dir.glob("*.md")) if domain_dir.exists() else [] - r["adoption"]["B4_domain_rules"] = 1 if domain_rules else 0 + r["adoption"]["B3_domain_rules"] = 1 if domain_rules else 0 - # B5: sync recency — project version matches current dotforge VERSION - r["adoption"]["B5_sync_recency"] = 1 if (prev_version and str(prev_version) == version) else 0 + # B4: sync recency — project version matches current dotforge VERSION + r["adoption"]["B4_sync_recency"] = 1 if (prev_version and str(prev_version) == version) else 0 # ── Score calculation ── mand = sum(r["items"][k] for k in ( @@ -340,7 +335,7 @@ def audit(proj_path: Path, name: str, version: str, prev_version) -> dict: label = "None" elif adoption <= 2: label = "Partial" - elif adoption <= 4: + elif adoption == 3: label = "Most" else: label = "Full" @@ -381,7 +376,7 @@ def main() -> int: prev = r.get("prev_score") or 0 delta = r["score"] - prev delta_s = f"{delta:+.2f}" if prev else " new" - adopt = f"{r['forge_adoption']}/5 {r['adoption_label'][:4]}" + adopt = f"{r['forge_adoption']}/4 {r['adoption_label'][:4]}" notes = ", ".join(r["notes"][:2]) if r["notes"] else "" print( f"{r['name']:<20} {r['mand']:>3}/10 {r['rec']:>3}/10 " diff --git a/scripts/process-override-log.sh b/scripts/process-override-log.sh deleted file mode 100755 index 8e95dc4..0000000 --- a/scripts/process-override-log.sh +++ /dev/null @@ -1,184 +0,0 @@ -#!/usr/bin/env bash -# dotforge v4 override capture loop -# -# Process .forge/audit/overrides.log → create practices/inbox/auto-override-*.md -# for behavior+tool combinations overridden ≥ MIN_OVERRIDES times in WINDOW_DAYS. -# -# Idempotent: same input produces same output. Already-captured (in inbox/active/ -# evaluating/deprecated) groups are skipped. -# -# Per-project log (.forge/audit/overrides.log under PWD) → dotforge inbox. -# Practices flow up to dotforge for centralized review via /forge update. -# -# Environment variables: -# DOTFORGE_DIR dotforge root (where practices/ lives) — required -# FORGE_ROOT defaults to .forge (per-project log location) -# MIN_OVERRIDES default 3 — minimum count to trigger capture -# WINDOW_DAYS default 30 — only consider overrides in last N days -# -# Exit codes: -# 0 — success (including "nothing to do") -# 1 — config error (DOTFORGE_DIR not set or invalid) -# 2 — log read error - -set -uo pipefail - -# === Config === -FORGE_ROOT="${FORGE_ROOT:-.forge}" -LOG="${FORGE_ROOT}/audit/overrides.log" -MIN_OVERRIDES="${MIN_OVERRIDES:-3}" -WINDOW_DAYS="${WINDOW_DAYS:-30}" -DATE_TODAY="$(date +%Y-%m-%d)" - -# DOTFORGE_DIR required -if [[ -z "${DOTFORGE_DIR:-}" ]]; then - echo "process-override-log: DOTFORGE_DIR not set — cannot locate practices/ dir" >&2 - exit 1 -fi -if [[ ! -d "${DOTFORGE_DIR}/practices/inbox" ]]; then - echo "process-override-log: ${DOTFORGE_DIR}/practices/inbox not found" >&2 - exit 1 -fi - -PRACTICES_INBOX="${DOTFORGE_DIR}/practices/inbox" -PRACTICES_EVALUATING="${DOTFORGE_DIR}/practices/evaluating" -PRACTICES_ACTIVE="${DOTFORGE_DIR}/practices/active" -PRACTICES_DEPRECATED="${DOTFORGE_DIR}/practices/deprecated" - -# Project slug: basename of PWD, lowercased, alphanumeric+dash only -PROJECT_SLUG="$(basename "$PWD" | tr '[:upper:]' '[:lower:]' | tr -c 'a-z0-9-' '-' | sed 's/-\+/-/g; s/^-\|-$//g')" -PROJECT_SLUG="${PROJECT_SLUG:-unknown}" - -# === Early exits === - -# Log doesn't exist or empty -if [[ ! -s "$LOG" ]]; then - exit 0 -fi - -# === Compute cutoff timestamp === -# Portable: try GNU date first (Linux), then BSD date (macOS) -cutoff="$(date -d "${WINDOW_DAYS} days ago" -u +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || \ - date -u -v-"${WINDOW_DAYS}"d +%Y-%m-%dT%H:%M:%SZ 2>/dev/null)" -if [[ -z "$cutoff" ]]; then - echo "process-override-log: failed to compute cutoff timestamp" >&2 - exit 2 -fi - -# === Hash helper (portable: md5sum on Linux, md5 -q on macOS) === -_hash() { - printf '%s' "$1" | (md5sum 2>/dev/null || md5 -q 2>/dev/null) | cut -c1-8 -} - -# === Group overrides within window by (behavior_id, tool_name) === -# Format of awk output: \t\t -groups=$(awk -F'|' -v cutoff="$cutoff" ' - $1 >= cutoff { count[$3 "\x1f" $4]++ } - END { - for (k in count) { - n = split(k, parts, "\x1f") - print count[k] "\t" parts[1] "\t" parts[2] - } - } -' "$LOG") - -[[ -z "$groups" ]] && exit 0 - -created_count=0 -skipped_count=0 - -# === Process each group === -while IFS=$'\t' read -r count behavior_id tool_name; do - # Skip if below threshold - if (( count < MIN_OVERRIDES )); then - continue - fi - - # Defensive: skip malformed groups - [[ -n "$behavior_id" && -n "$tool_name" ]] || continue - - # Dedup hash (stable across runs) - key="${PROJECT_SLUG}|${behavior_id}|${tool_name}" - hash="$(_hash "$key")" - - # Normalize tool name for filename (lowercase, alphanum+dash) - tool_slug="$(printf '%s' "$tool_name" | tr '[:upper:]' '[:lower:]' | tr -c 'a-z0-9-' '-')" - behavior_slug="$(printf '%s' "$behavior_id" | tr '[:upper:]' '[:lower:]' | tr -c 'a-z0-9-' '-')" - - filename="auto-override-${PROJECT_SLUG}-${behavior_slug}-${tool_slug}-${hash}.md" - - # Skip if already captured (inbox/evaluating/active/deprecated) - already_exists=0 - for dir in "$PRACTICES_INBOX" "$PRACTICES_EVALUATING" "$PRACTICES_ACTIVE" "$PRACTICES_DEPRECATED"; do - if [[ -f "${dir}/${filename}" ]]; then - already_exists=1 - skipped_count=$((skipped_count + 1)) - break - fi - done - (( already_exists )) && continue - - # Sample last 5 raw log lines for this (behavior, tool) within window - samples="$(awk -F'|' -v b="$behavior_id" -v t="$tool_name" -v cutoff="$cutoff" ' - $1 >= cutoff && $3 == b && $4 == t - ' "$LOG" | tail -5)" - - # Write practice file - cat > "${PRACTICES_INBOX}/${filename}" < 0 || skipped_count > 0 )); then - echo "process-override-log: ${created_count} new, ${skipped_count} skipped (already captured) — project=${PROJECT_SLUG}" -fi - -exit 0 diff --git a/scripts/runtime/lib.sh b/scripts/runtime/lib.sh index 327ece5..8a56e93 100644 --- a/scripts/runtime/lib.sh +++ b/scripts/runtime/lib.sh @@ -17,7 +17,6 @@ set -u FORGE_ROOT="${FORGE_ROOT:-.forge}" FORGE_STATE_FILE="${FORGE_ROOT}/runtime/state.json" FORGE_LOCK_DIR="${FORGE_ROOT}/runtime/state.lock" -FORGE_AUDIT_LOG="${FORGE_ROOT}/audit/overrides.log" FORGE_LOCK_TIMEOUT="${FORGE_LOCK_TIMEOUT:-2}" FORGE_SESSION_TTL_SECONDS="${FORGE_SESSION_TTL_SECONDS:-86400}" FORGE_EMPTY_STATE='{"schema_version":"1","sessions":{}}' @@ -67,16 +66,13 @@ _forge_require_jq() { # Returns 0 on success, 1 if jq is missing. forge_init() { _forge_require_jq || return 1 - mkdir -p "${FORGE_ROOT}/runtime" "${FORGE_ROOT}/audit" 2>/dev/null || { + mkdir -p "${FORGE_ROOT}/runtime" 2>/dev/null || { _forge_log "cannot create ${FORGE_ROOT}/ — disk full or permission denied" return 1 } if [ ! -f "$FORGE_STATE_FILE" ]; then printf '%s\n' "$FORGE_EMPTY_STATE" > "$FORGE_STATE_FILE" fi - if [ ! -f "$FORGE_AUDIT_LOG" ]; then - : > "$FORGE_AUDIT_LOG" - fi return 0 } @@ -591,11 +587,6 @@ forge_pending_block_try_override() { printf '%s' "$mutated" | _forge_state_write forge_lock_release trap - EXIT INT TERM - - # Append to permanent audit log - printf '%s|%s|%s|%s|%s|%s|%s\n' \ - "$now_iso" "$sid" "$bid" "$tool" "$summary" "$counter_at" "" \ - >> "$FORGE_AUDIT_LOG" return 0 } @@ -613,35 +604,6 @@ _forge_iso_to_epoch() { printf '0' } -# --------------------------------------------------------------------------- -# Override audit (triple-write: state.json + overrides.log) -# --------------------------------------------------------------------------- - -# forge_override_append — record a soft_block override. -# Args: session_id behavior_id tool_name tool_input_summary counter reason -forge_override_append() { - local sid="$1" bid="$2" tool="$3" summary="$4" counter="$5" reason="${6:-}" - local now - now=$(_forge_now_iso8601) - local filter=' - .sessions[$sid].behaviors[$bid].overrides += [{ - "timestamp": $now, - "tool_name": $tool, - "tool_input_summary": $summary, - "counter_at_override": ($counter | tonumber), - "reason": $reason - }] - ' - _forge_run_mutation "$filter" \ - --arg sid "$sid" --arg bid "$bid" --arg now "$now" \ - --arg tool "$tool" --arg summary "$summary" \ - --arg counter "$counter" --arg reason "$reason" || return 1 - # Append to permanent audit log (pipe-delimited). - printf '%s|%s|%s|%s|%s|%s|%s\n' \ - "$now" "$sid" "$bid" "$tool" "$summary" "$counter" "$reason" \ - >> "$FORGE_AUDIT_LOG" -} - # --------------------------------------------------------------------------- # Debug / test helpers # --------------------------------------------------------------------------- diff --git a/scripts/runtime/tests/test_pending_block.sh b/scripts/runtime/tests/test_pending_block.sh index 60390f3..f13766f 100755 --- a/scripts/runtime/tests/test_pending_block.sh +++ b/scripts/runtime/tests/test_pending_block.sh @@ -48,10 +48,6 @@ ov_count=$(jq -r --arg sid "$SID" --arg bid "$BID" \ "$FORGE_STATE_FILE") assert_eq "1" "$ov_count" "override recorded in state" || exit 1 -# audit log line present -audit_lines=$(wc -l < "$FORGE_AUDIT_LOG" | tr -d ' ') -assert_eq "1" "$audit_lines" "override recorded in audit log" || exit 1 - # --- try_override with no pending_block → returns 1, no changes --- if forge_pending_block_try_override "$SID" "$BID" "Write" "$h1" "summary"; then printf 'FAIL: try_override should have returned 1 with no pending_block\n' >&2 diff --git a/skills/audit-project/SKILL.md b/skills/audit-project/SKILL.md index 0f27e41..a519b06 100644 --- a/skills/audit-project/SKILL.md +++ b/skills/audit-project/SKILL.md @@ -80,12 +80,11 @@ For each checklist item, verify existence **and quality**: - `simple`: items 14-15 score 0 don't penalize (treated as N/A) - `complex`: items 14-15 become semi-obligatory (each 0-2 instead of 0-1) -### Dimension B — dotforge Adoption (informational, 0-5, does NOT affect native_health) +### Dimension B — dotforge Adoption (informational, 0-4, does NOT affect native_health) - **B1. v3 behaviors compiled** — `.claude/hooks/generated/*.sh` exist AND referenced in settings.json? - **B2. Workflow availability (v4)** — `workflows/` with at least one `.js` containing `export const meta`? -- **B3. Override capture loop (v4)** — `.forge/audit/overrides.log` exists AND `session-start-process-overrides.sh` wired in SessionStart? -- **B4. Domain rules** — at least one rule in `.claude/rules/domain/` with `last_verified` <90 days? Report stale count. -- **B5. Sync recency** — project `dotforge_version` == `$DOTFORGE_DIR/VERSION`? +- **B3. Domain rules** — at least one rule in `.claude/rules/domain/` with `last_verified` <90 days? Report stale count. +- **B4. Sync recency** — project `dotforge_version` == `$DOTFORGE_DIR/VERSION`? A project scoring B=0 (native-first) is a valid, non-penalized outcome. Never recommend adopting dotforge machinery just to raise B. @@ -103,8 +102,8 @@ Use weights from `$DOTFORGE_DIR/audit/scoring.md`: **Security cap:** If item 2 (settings.json) or item 4 (block-destructive) is 0, `native_health` max = 6.0. **Dimension B — dotforge Adoption (informational):** -6. `forge_adoption = sum(items B1-B5)` — 0 to 5. Does NOT enter native_health. -7. Label: 0=None, 1-2=Partial, 3-4=Most, 5=Full. +6. `forge_adoption = sum(items B1-B4)` — 0 to 4. Does NOT enter native_health. +7. Label: 0=None, 1-2=Partial, 3=Most, 4=Full. ## Step 5: Generate report @@ -116,7 +115,7 @@ Detected stack: {{stacks}} Tier: {{simple|standard|complex}} dotforge version: {{version from last bootstrap/sync if detectable}} Native Health: {{X.X}}/10 {{level}} -dotforge Adoption: {{N}}/5 {{None|Partial|Most|Full}} (informational — does not affect Native Health) +dotforge Adoption: {{N}}/4 {{None|Partial|Most|Full}} (informational — does not affect Native Health) ═ DIMENSION A — NATIVE HEALTH ═ @@ -142,9 +141,8 @@ dotforge Adoption: {{N}}/5 {{None|Partial|Most|Full}} (informational — does n ═ DIMENSION B — DOTFORGE ADOPTION ═ (informational) {{✅|—}} B1 v3 behaviors compiled — {{detail: N generated hooks, settings reference yes/no}} {{✅|—}} B2 v4 workflow availability — {{detail: N .js workflows OR "none"}} -{{✅|—}} B3 v4 override loop active — {{detail: hook wired yes/no, log exists yes/no}} -{{✅|—}} B4 domain rules — {{detail: N rules, M stale >90d}} -{{✅|—}} B5 sync recency — {{detail: project version vs current VERSION}} +{{✅|—}} B3 domain rules — {{detail: N rules, M stale >90d}} +{{✅|—}} B4 sync recency — {{detail: project version vs current VERSION}} ── DOMAIN KNOWLEDGE ── Role defined: {{✓ if ## Role exists in CLAUDE.md with content | ✗ otherwise}} @@ -194,7 +192,7 @@ This closes the Audit → Learning synergy: detected gaps feed back into the pra If `$DOTFORGE_DIR/registry/projects.yml` exists, update the project entry: - `score:` with `native_health` (the primary score — preserves trend continuity with prior audits) -- `forge_adoption:` with the dimension-B value (0-5) +- `forge_adoption:` with the dimension-B value (0-4) - `last_audit:` with the current date - `dotforge_version:` with the VERSION version if the project was bootstrapped - `last_sync:` preserve the existing value (do not modify here) diff --git a/template/hooks/session-start-process-overrides.sh b/template/hooks/session-start-process-overrides.sh deleted file mode 100755 index d781387..0000000 --- a/template/hooks/session-start-process-overrides.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bash -# dotforge v4 — SessionStart hook: process override log -# -# Calls scripts/process-override-log.sh from $DOTFORGE_DIR to capture frequent -# soft_block overrides into practices/inbox/auto-override-*.md. -# -# Non-blocking: always exits 0. Failures are logged to stderr (visible via -# CLAUDE_CODE_DEBUG=hooks) but never prevent session start. -# -# Configuration: see scripts/process-override-log.sh - -set -uo pipefail - -# Skip silently if DOTFORGE_DIR is not set (project not bootstrapped via dotforge) -if [[ -z "${DOTFORGE_DIR:-}" ]]; then - exit 0 -fi - -SCRIPT="${DOTFORGE_DIR}/scripts/process-override-log.sh" - -# Skip if dotforge has v3 only (no v4 script yet) -if [[ ! -x "$SCRIPT" ]]; then - exit 0 -fi - -# Run with a short timeout to never block session start. -# Portable: prefer gtimeout (macOS+coreutils) then timeout (Linux), else run unbounded. -if command -v gtimeout >/dev/null 2>&1; then - gtimeout 5 "$SCRIPT" 2>&1 | head -3 1>&2 || true -elif command -v timeout >/dev/null 2>&1; then - timeout 5 "$SCRIPT" 2>&1 | head -3 1>&2 || true -else - # No timeout binary (macOS without coreutils). Script has internal early-exits - # and bounded work (file size). Run unbounded; suppress errors to never block. - "$SCRIPT" 2>&1 | head -3 1>&2 || true -fi - -exit 0 diff --git a/template/settings.json.tmpl b/template/settings.json.tmpl index c715b16..2947662 100644 --- a/template/settings.json.tmpl +++ b/template/settings.json.tmpl @@ -84,11 +84,6 @@ "type": "command", "command": ".claude/hooks/session-startup.sh", "timeout": 10 - }, - { - "type": "command", - "command": ".claude/hooks/session-start-process-overrides.sh", - "timeout": 5 } ] } diff --git a/tests/test-process-override-log.sh b/tests/test-process-override-log.sh deleted file mode 100755 index e1c7bc5..0000000 --- a/tests/test-process-override-log.sh +++ /dev/null @@ -1,219 +0,0 @@ -#!/usr/bin/env bash -# Tests for scripts/process-override-log.sh -# Runs in an isolated tmp dir; never touches the real dotforge state. - -set -uo pipefail - -# === Setup === -DOTFORGE_REPO="$(cd "$(dirname "$0")/.." && pwd)" -SCRIPT="${DOTFORGE_REPO}/scripts/process-override-log.sh" - -if [[ ! -x "$SCRIPT" ]]; then - echo "FAIL: ${SCRIPT} not found or not executable" - exit 1 -fi - -pass_count=0 -fail_count=0 - -_pass() { pass_count=$((pass_count + 1)); echo " ✓ $1"; } -_fail() { fail_count=$((fail_count + 1)); echo " ✗ $1"; } - -# === Helper: set up isolated test environment === -_setup_env() { - TMPDIR="$(mktemp -d -t process-override-test.XXXXXX)" - export DOTFORGE_DIR="${TMPDIR}/dotforge" - mkdir -p "${DOTFORGE_DIR}/practices"/{inbox,evaluating,active,deprecated} - PROJECT_DIR="${TMPDIR}/sample-project" - mkdir -p "${PROJECT_DIR}/.forge/audit" - cd "$PROJECT_DIR" - export FORGE_ROOT=".forge" -} - -_cleanup() { - cd / - rm -rf "$TMPDIR" -} - -# === Test 1: empty log → no captures === -echo "Test 1: empty log" -_setup_env -touch "${PROJECT_DIR}/.forge/audit/overrides.log" -"$SCRIPT" >/dev/null 2>&1 -inbox_files="$(ls "${DOTFORGE_DIR}/practices/inbox" 2>/dev/null | wc -l | tr -d ' ')" -if [[ "$inbox_files" == "0" ]]; then - _pass "empty log produces no captures" -else - _fail "empty log produced ${inbox_files} captures (expected 0)" -fi -_cleanup - -# === Test 2: missing log → no captures, no error === -echo "Test 2: missing log" -_setup_env -"$SCRIPT" >/dev/null 2>&1 -rc=$? -inbox_files="$(ls "${DOTFORGE_DIR}/practices/inbox" 2>/dev/null | wc -l | tr -d ' ')" -if [[ "$rc" == "0" && "$inbox_files" == "0" ]]; then - _pass "missing log exits 0 with no captures" -else - _fail "missing log: rc=${rc}, inbox=${inbox_files}" -fi -_cleanup - -# === Test 3: 2 overrides (below MIN_OVERRIDES=3) → no capture === -echo "Test 3: below threshold" -_setup_env -LOG="${PROJECT_DIR}/.forge/audit/overrides.log" -{ - echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)|abc12345|verify-before-done|Bash|cmd=git push|3|" - echo "$(date -u +%Y-%m-%dT%H:%M:%SZ)|abc12345|verify-before-done|Bash|cmd=git push|4|hotfix" -} > "$LOG" -"$SCRIPT" >/dev/null 2>&1 -inbox_files="$(ls "${DOTFORGE_DIR}/practices/inbox" 2>/dev/null | wc -l | tr -d ' ')" -if [[ "$inbox_files" == "0" ]]; then - _pass "2 overrides below MIN_OVERRIDES=3 produce no capture" -else - _fail "expected 0 captures with 2 overrides; got ${inbox_files}" -fi -_cleanup - -# === Test 4: 3 overrides → 1 capture === -echo "Test 4: at threshold" -_setup_env -LOG="${PROJECT_DIR}/.forge/audit/overrides.log" -ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)" -{ - echo "${ts}|abc12345|verify-before-done|Bash|cmd=git push|3|" - echo "${ts}|abc12345|verify-before-done|Bash|cmd=git commit|4|" - echo "${ts}|abc12345|verify-before-done|Bash|cmd=git tag|5|hotfix" -} > "$LOG" -"$SCRIPT" >/dev/null 2>&1 -inbox_files="$(ls "${DOTFORGE_DIR}/practices/inbox" 2>/dev/null | wc -l | tr -d ' ')" -if [[ "$inbox_files" == "1" ]]; then - _pass "3 overrides at MIN_OVERRIDES produce 1 capture" - # Verify frontmatter has key fields - pf="$(ls "${DOTFORGE_DIR}/practices/inbox"/*.md | head -1)" - if grep -q "source_type: auto-override" "$pf" && \ - grep -q "behavior_id: verify-before-done" "$pf" && \ - grep -q "tool_name: Bash" "$pf" && \ - grep -q "count: 3" "$pf"; then - _pass " frontmatter has expected fields" - else - _fail " frontmatter missing key fields" - fi -else - _fail "expected 1 capture; got ${inbox_files}" -fi -_cleanup - -# === Test 5: re-run on same log → idempotent (no duplicate) === -echo "Test 5: idempotent re-run" -_setup_env -LOG="${PROJECT_DIR}/.forge/audit/overrides.log" -ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)" -{ - echo "${ts}|abc12345|verify-before-done|Bash|cmd=git push|3|" - echo "${ts}|abc12345|verify-before-done|Bash|cmd=git commit|4|" - echo "${ts}|abc12345|verify-before-done|Bash|cmd=git tag|5|hotfix" -} > "$LOG" -"$SCRIPT" >/dev/null 2>&1 -"$SCRIPT" >/dev/null 2>&1 # re-run -inbox_files="$(ls "${DOTFORGE_DIR}/practices/inbox" 2>/dev/null | wc -l | tr -d ' ')" -if [[ "$inbox_files" == "1" ]]; then - _pass "re-run on same log does not duplicate (idempotent)" -else - _fail "re-run produced ${inbox_files} files (expected 1)" -fi -_cleanup - -# === Test 6: skips if practice exists in active/ === -echo "Test 6: skip if already in active/" -_setup_env -LOG="${PROJECT_DIR}/.forge/audit/overrides.log" -ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)" -{ - echo "${ts}|abc12345|search-first|Edit|file_path=/src/utils.ts|3|" - echo "${ts}|abc12345|search-first|Edit|file_path=/src/main.ts|4|" - echo "${ts}|abc12345|search-first|Edit|file_path=/src/app.ts|5|" -} > "$LOG" -# Pre-populate active/ with the would-be filename -# Mirror script's normalization exactly: lowercase, non [a-z0-9-] → '-', collapse, trim -project_slug="$(basename "$PROJECT_DIR" | tr '[:upper:]' '[:lower:]' | tr -c 'a-z0-9-' '-' | sed 's/-\+/-/g; s/^-\|-$//g')" -key="${project_slug}|search-first|Edit" -hash="$(printf '%s' "$key" | (md5sum 2>/dev/null || md5 -q) | cut -c1-8)" -fn="auto-override-${project_slug}-search-first-edit-${hash}.md" -touch "${DOTFORGE_DIR}/practices/active/${fn}" -"$SCRIPT" >/dev/null 2>&1 -inbox_files="$(ls "${DOTFORGE_DIR}/practices/inbox" 2>/dev/null | wc -l | tr -d ' ')" -if [[ "$inbox_files" == "0" ]]; then - _pass "skip when same practice exists in active/" -else - _fail "expected 0 captures (already in active); got ${inbox_files}" -fi -_cleanup - -# === Test 7: multiple distinct (behavior, tool) groups === -echo "Test 7: multiple groups" -_setup_env -LOG="${PROJECT_DIR}/.forge/audit/overrides.log" -ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)" -{ - # Group A: verify-before-done on Bash (3 entries) - echo "${ts}|abc12345|verify-before-done|Bash|cmd=git push|3|" - echo "${ts}|abc12345|verify-before-done|Bash|cmd=git commit|4|" - echo "${ts}|abc12345|verify-before-done|Bash|cmd=git tag|5|" - # Group B: search-first on Edit (3 entries) - echo "${ts}|abc12345|search-first|Edit|file=/src/a.ts|3|" - echo "${ts}|abc12345|search-first|Edit|file=/src/b.ts|4|" - echo "${ts}|abc12345|search-first|Edit|file=/src/c.ts|5|" - # Group C: respect-todo-state on TaskUpdate (2 entries, below threshold) - echo "${ts}|abc12345|respect-todo-state|TaskUpdate|task=foo|2|" - echo "${ts}|abc12345|respect-todo-state|TaskUpdate|task=bar|3|" -} > "$LOG" -"$SCRIPT" >/dev/null 2>&1 -inbox_files="$(ls "${DOTFORGE_DIR}/practices/inbox" 2>/dev/null | wc -l | tr -d ' ')" -if [[ "$inbox_files" == "2" ]]; then - _pass "2 groups above threshold produce 2 captures (third group below threshold)" -else - _fail "expected 2 captures (2 above threshold, 1 below); got ${inbox_files}" -fi -_cleanup - -# === Test 8: missing DOTFORGE_DIR exits 1 === -echo "Test 8: missing DOTFORGE_DIR" -_setup_env -unset DOTFORGE_DIR -"$SCRIPT" >/dev/null 2>&1 -rc=$? -if [[ "$rc" == "1" ]]; then - _pass "missing DOTFORGE_DIR exits 1" -else - _fail "expected exit 1 with missing DOTFORGE_DIR; got rc=${rc}" -fi -_cleanup - -# === Test 9: out-of-window entries ignored === -echo "Test 9: out-of-window entries" -_setup_env -LOG="${PROJECT_DIR}/.forge/audit/overrides.log" -old_ts="2020-01-01T00:00:00Z" -{ - # All very old — outside default 30-day window - echo "${old_ts}|abc12345|verify-before-done|Bash|cmd=git push|3|" - echo "${old_ts}|abc12345|verify-before-done|Bash|cmd=git commit|4|" - echo "${old_ts}|abc12345|verify-before-done|Bash|cmd=git tag|5|" -} > "$LOG" -"$SCRIPT" >/dev/null 2>&1 -inbox_files="$(ls "${DOTFORGE_DIR}/practices/inbox" 2>/dev/null | wc -l | tr -d ' ')" -if [[ "$inbox_files" == "0" ]]; then - _pass "out-of-window entries are ignored" -else - _fail "expected 0 captures for out-of-window entries; got ${inbox_files}" -fi -_cleanup - -# === Final report === -echo "" -echo "═══ test-process-override-log: ${pass_count} passed, ${fail_count} failed ═══" -(( fail_count == 0 ))