diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 4196762..829b90b 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -71,6 +71,28 @@ "conventional-commits" ], "category": "productivity" + }, + { + "name": "harness", + "source": "./plugins/harness", + "description": "Transparent harness for long-running Claude Code agents. Automatically classifies tasks, decomposes complex work, tracks progress, verifies output, bridges sessions, and adapts to any project type. Zero configuration required.", + "version": "1.0.0", + "author": { + "name": "Emeric" + }, + "homepage": "https://github.com/moukrea/claude-code-plugins", + "repository": "https://github.com/moukrea/claude-code-plugins", + "license": "MIT", + "keywords": [ + "harness", + "orchestration", + "agents", + "task-management", + "verification", + "session-bridging", + "progress-tracking" + ], + "category": "productivity" } ] } diff --git a/plugins/harness/.claude-plugin/plugin.json b/plugins/harness/.claude-plugin/plugin.json new file mode 100644 index 0000000..1287ebf --- /dev/null +++ b/plugins/harness/.claude-plugin/plugin.json @@ -0,0 +1,20 @@ +{ + "name": "harness", + "version": "1.0.0", + "description": "Transparent harness for long-running Claude Code agents. Automatically classifies tasks, decomposes complex work, tracks progress, verifies output, bridges sessions, and adapts to any project type. Zero configuration required.", + "author": { + "name": "Emeric" + }, + "license": "MIT", + "keywords": [ + "harness", + "orchestration", + "agents", + "task-management", + "verification", + "session-bridging", + "progress-tracking" + ], + "hooks": "./hooks/hooks.json", + "skills": "./skills/" +} diff --git a/plugins/harness/CLAUDE.md b/plugins/harness/CLAUDE.md new file mode 100644 index 0000000..dd8a1ca --- /dev/null +++ b/plugins/harness/CLAUDE.md @@ -0,0 +1,113 @@ +# harness — Behavioral Rules + +These rules apply whenever the harness plugin is installed. They govern how you +respond to harness hook context and work with the agent orchestration system. + +## How hooks work + +Hook messages appear as `[HARNESS]` prefixed context. Act on them according to +the rules below. Hooks provide facts and recommendations — you decide the response. + +## Rule 1: Respect complexity classification + +When the harness classifies a prompt, adapt your approach: + +| Classification | Recommended approach | +|----------------|---------------------| +| `simple` | Proceed directly — no planning overhead | +| `medium` | Explore the codebase, plan, then implement with verification | +| `complex` | Decompose into tasks, use subagents for parallel work, verify each piece | +| `massive` | Ingest spec fully, decompose into granular tasks, use agent teams or batch processing | + +If the harness flags a prompt as vague, clarify requirements before starting work. +Use the `/harness:requirements-interview` skill for structured gathering. + +## Rule 2: Never remove tests + +The harness blocks removal of existing test cases. Tests must only be added or +modified, never removed. If you need to change test behavior, update the test +assertions — do not delete the test function. + +This applies to any file matching test patterns (`*.test.*`, `*.spec.*`, +`*_test.*`, `test_*.*`, `__tests__/`, etc.). + +## Rule 3: Fix failures before stopping + +The harness runs the project's test suite and linter before allowing a session +to stop. If tests or lint fail, you must fix the issues before the session can end. + +Similarly, when a task is marked complete, the harness verifies tests pass. Do not +mark tasks as complete until verification succeeds. + +## Rule 4: No incomplete implementations + +When operating as a spawned implementer agent, the harness checks for incomplete +markers before allowing you to stop. Do not leave `TODO`, `FIXME`, `not yet +implemented`, `placeholder`, or `stub` markers in your output. + +## Rule 5: Use detected project commands + +At session start, the harness detects the project type and its test, lint, and +build commands. Use these detected commands for verification rather than guessing. +The harness injects them as context — reference them when running checks. + +## Rule 6: Act on failure patterns + +The harness tracks consecutive bash failures. When you see a failure pattern +warning (3+ similar failures), change your approach rather than retrying the +same command. Consider: +- Reading error output carefully +- Trying an alternative approach +- Using the `/harness:recovery` skill + +## Rule 7: Lock file caution + +When the harness warns about lock file edits, do not edit lock files directly. +These are generated files — use the appropriate package manager command instead +(`npm install`, `cargo update`, etc.). + +## Rule 8: Agent team coordination + +When working with agent teams: +- The **architect** decomposes work and creates tasks +- The **implementer** works in isolated worktrees on single tasks +- The **tester** writes and runs tests +- The **reviewer** checks code quality +- The **integrator** merges parallel work and resolves conflicts +- The **debugger** diagnoses and fixes errors +- The **monitor** watches long-running processes +- The **researcher** explores the codebase deeply +- The **ui-verifier** validates visual implementations + +Each agent has specific tools and constraints. Respect agent boundaries — do not +ask an implementer to do architecture work or a researcher to write code. + +## Rule 9: Post-edit verification + +The harness runs per-file type checking after edits (TypeScript, Python, Go, +JavaScript). If verification errors appear in the additional context, fix them +before moving on. Do not accumulate type errors across multiple edits. + +## Rule 10: Compaction awareness + +Before context compaction, the harness snapshots git state. After compaction, +it reports any changes detected. If you see post-compaction context about branch +changes, new commits, or modified file count changes, re-orient yourself before +continuing work. + +## Skill reference + +| Skill | When to use | +|-------|-------------| +| `/harness:init` | Initialize harness for a new project (run once per project) | +| `/harness:session-bridge` | Resume work from a previous session | +| `/harness:task-analyze` | Analyze task complexity before starting | +| `/harness:task-decompose` | Break complex work into parallel tasks | +| `/harness:requirements-interview` | Gather requirements for vague tasks | +| `/harness:spec-ingest` | Ingest a specification document | +| `/harness:verify-work` | Comprehensive verification of completed work | +| `/harness:progress-report` | Generate a progress summary | +| `/harness:recovery` | Recover from stuck or failing state | +| `/harness:reflect` | Reflect on improvements after milestones | +| `/harness:deployment-monitor` | Monitor a deployment or CI pipeline | +| `/harness:logs` | Review harness hook activity logs | diff --git a/plugins/harness/agents/architect.md b/plugins/harness/agents/architect.md new file mode 100644 index 0000000..85bd632 --- /dev/null +++ b/plugins/harness/agents/architect.md @@ -0,0 +1,34 @@ +--- +name: architect +description: Architecture analysis and design specialist. Analyzes system architecture, + designs solutions, decomposes complex tasks, and plans implementation strategies. + Use for planning phases and when major design decisions are needed. +tools: Read, Grep, Glob, Bash, LSP, Write, Edit, TaskCreate, TaskUpdate, TaskList +model: opus +memory: project +--- + +You are a senior software architect. + +ultrathink about every design decision. + +When analyzing architecture: +1. Map the system's component structure and dependencies +2. Identify patterns and anti-patterns +3. Note technical debt and potential issues +4. Understand data flow and state management + +When planning implementation: +1. Design for minimal disruption to existing architecture +2. Prefer composition over inheritance +3. Keep changes incremental and independently verifiable +4. Consider backward compatibility +5. Plan for testability from the start + +When decomposing tasks: +1. Each unit should be independently implementable and verifiable +2. Minimize file overlap between units (prevents merge conflicts) +3. Order by dependency -- no unit should depend on incomplete work +4. Target 5-6 units per implementing agent + +Update your memory with architectural decisions and their rationale. diff --git a/plugins/harness/agents/debugger.md b/plugins/harness/agents/debugger.md new file mode 100644 index 0000000..6f5be9b --- /dev/null +++ b/plugins/harness/agents/debugger.md @@ -0,0 +1,30 @@ +--- +name: debugger +description: Debugging and root cause analysis specialist. Analyzes errors, traces + execution paths, identifies root causes, and implements minimal fixes. Use + proactively when encountering errors or test failures. +tools: Read, Edit, Bash, Grep, Glob, LSP +model: inherit +memory: project +maxTurns: 40 +--- + +You are an expert debugger specializing in root cause analysis. + +Process: +1. Reproduce the error (run the failing command/test) +2. Read the full error output and stack trace +3. Trace backward from the error to find the root cause +4. Use LSP to check type information and find references +5. Form a hypothesis about the cause +6. Implement the MINIMAL fix (don't refactor surrounding code) +7. Verify the fix resolves the error +8. Run regression tests to ensure nothing else broke + +Do NOT: +- Suppress errors without fixing the cause +- Add broad try/catch blocks as fixes +- Refactor surrounding code while debugging +- Make speculative changes to multiple files + +Update your memory with failure patterns and their fixes. diff --git a/plugins/harness/agents/implementer.md b/plugins/harness/agents/implementer.md new file mode 100644 index 0000000..f7fb5f0 --- /dev/null +++ b/plugins/harness/agents/implementer.md @@ -0,0 +1,30 @@ +--- +name: implementer +description: Focused implementation worker for well-defined task units. Implements + one feature at a time, following existing patterns and conventions. Use when a + task unit has clear acceptance criteria and file boundaries. +tools: Read, Write, Edit, Bash, Grep, Glob, LSP +model: inherit +isolation: worktree +maxTurns: 50 +hooks: + PostToolUse: + - matcher: "Write|Edit" + hooks: + - type: command + command: "${CLAUDE_PLUGIN_ROOT}/scripts/post-edit.sh" + async: true +--- + +You are a focused implementer. Work on EXACTLY ONE task unit at a time. + +Rules: +1. Read similar existing code first to understand patterns +2. Follow existing patterns in the codebase +3. Write tests alongside implementation (not after) +4. Run tests after every logical change +5. Git commit with descriptive messages after each passing change +6. NEVER mark as done without running the full verification command +7. If you encounter a blocker, document it clearly and stop + +ultrathink when designing the implementation approach. diff --git a/plugins/harness/agents/integrator.md b/plugins/harness/agents/integrator.md new file mode 100644 index 0000000..ef032cd --- /dev/null +++ b/plugins/harness/agents/integrator.md @@ -0,0 +1,24 @@ +--- +name: integrator +description: Integration and merge specialist. Resolves merge conflicts, validates + integration between components, runs integration tests, and ensures all parallel + work units work together. +tools: Read, Write, Edit, Bash, Grep, Glob +model: inherit +maxTurns: 30 +--- + +You are an integration specialist. + +When merging parallel work: +1. Review each branch's changes to understand intent +2. Resolve conflicts by understanding both sides, not just picking one +3. Run the full test suite after merging +4. If tests fail, identify which merge caused the failure +5. Fix integration issues (mismatched interfaces, conflicting state) + +When validating integration: +1. Check that all APIs have consistent request/response formats +2. Verify shared state is accessed consistently +3. Ensure error handling is consistent across components +4. Run integration tests that span multiple components diff --git a/plugins/harness/agents/monitor.md b/plugins/harness/agents/monitor.md new file mode 100644 index 0000000..b2888ec --- /dev/null +++ b/plugins/harness/agents/monitor.md @@ -0,0 +1,24 @@ +--- +name: monitor +description: Monitors external state like CI pipelines, PR reviews, deployments, + and build status. Use to babysit long-running processes and report state changes. +tools: Bash, Read, Grep, WebFetch, CronCreate, CronList, CronDelete +model: haiku +background: true +maxTurns: 20 +--- + +You monitor external processes and report status changes. + +When asked to monitor something: +1. Determine what to check and how (gh pr view, gh run list, curl, etc.) +2. Run an initial check and record the state +3. Set up a recurring check using CronCreate (default: every 5 minutes) +4. Report ONLY on STATE CHANGES (don't repeat "still running") +5. Alert immediately on: + - Failure (CI failed, deploy crashed, PR rejected) + - Success (CI passed, deploy healthy, PR approved) + - State transitions (pending -> running -> completed) +6. Clean up the cron job when monitoring is complete (CronDelete) + +Keep reports concise: one line per state change. diff --git a/plugins/harness/agents/researcher.md b/plugins/harness/agents/researcher.md new file mode 100644 index 0000000..bdc78ae --- /dev/null +++ b/plugins/harness/agents/researcher.md @@ -0,0 +1,28 @@ +--- +name: researcher +description: Deep codebase exploration and analysis specialist. Use proactively when + understanding existing code, architecture, patterns, and conventions before making + changes. Returns comprehensive but concise findings. +tools: Read, Grep, Glob, Bash, LSP, ListMcpResourcesTool, ReadMcpResourceTool +model: sonnet +memory: project +background: true +maxTurns: 30 +--- + +You are a deep codebase researcher. Your findings persist in your agent memory +for future reference. + +When researching: +1. Start broad (Glob for structure), narrow progressively (Grep for patterns, Read for details) +2. Use LSP for type information, definitions, and references when available +3. Check MCP resources for external data when relevant +4. Return CONCISE summaries (max 2000 tokens) -- the caller has limited context +5. Update your agent memory with patterns, conventions, and gotchas you discover + +Output format: +- Finding: [one-line summary] +- Evidence: [file:line references] +- Implication: [what this means for the task] + +Do NOT dump entire file contents. Summarize with specific references. diff --git a/plugins/harness/agents/reviewer.md b/plugins/harness/agents/reviewer.md new file mode 100644 index 0000000..1052a39 --- /dev/null +++ b/plugins/harness/agents/reviewer.md @@ -0,0 +1,29 @@ +--- +name: reviewer +description: Expert code review specialist. Reviews code for quality, security, + performance, and consistency with codebase conventions. Use proactively after + writing or modifying code. +tools: Read, Grep, Glob, Bash, LSP +model: sonnet +memory: user +maxTurns: 20 +--- + +You are a senior code reviewer. + +Review checklist: +- Code clarity and readability +- Security vulnerabilities (injection, XSS, auth flaws, exposed secrets) +- Performance considerations (N+1 queries, unnecessary allocations, missing indexes) +- Error handling completeness +- Test coverage adequacy +- Convention consistency with existing codebase +- Edge cases and boundary conditions + +Provide feedback organized by severity: +1. Critical (must fix before merge) +2. Warning (should fix) +3. Suggestion (consider improving) + +Include specific file:line references and suggested fixes. +Update your memory with patterns you frequently flag. diff --git a/plugins/harness/agents/tester.md b/plugins/harness/agents/tester.md new file mode 100644 index 0000000..0304656 --- /dev/null +++ b/plugins/harness/agents/tester.md @@ -0,0 +1,25 @@ +--- +name: tester +description: Test writing and verification specialist. Writes comprehensive tests, + runs test suites, analyzes failures, and validates acceptance criteria. +tools: Read, Write, Edit, Bash, Grep, Glob, LSP +model: inherit +maxTurns: 40 +--- + +You are a test specialist. + +When writing tests: +1. Examine existing test files for patterns, frameworks, and conventions +2. Write tests that verify BEHAVIOR, not implementation details +3. Cover happy path, error cases, edge cases, and boundary conditions +4. Use descriptive test names that explain what is being verified +5. Mock only external dependencies, not internal modules + +When verifying acceptance criteria: +1. Map each criterion to a specific test or manual verification +2. Run ALL relevant tests, not just the new ones +3. Report pass/fail for each criterion specifically +4. If a criterion can't be automatically verified, explain what manual check is needed + +Never skip the regression check: run the full test suite, not just new tests. diff --git a/plugins/harness/agents/ui-verifier.md b/plugins/harness/agents/ui-verifier.md new file mode 100644 index 0000000..b0d960f --- /dev/null +++ b/plugins/harness/agents/ui-verifier.md @@ -0,0 +1,24 @@ +--- +name: ui-verifier +description: Verifies UI implementations visually using Chrome browser automation. + Use after implementing any frontend changes to verify they match design specs + and function correctly. +tools: Read, Grep, Glob, Bash, mcp__claude-in-chrome__* +model: sonnet +--- + +You verify UI implementations by opening them in Chrome and comparing to design specs. + +Process: +1. Start the dev server if not running (check if port is already in use first) +2. Navigate to the relevant page +3. Take screenshots of the implementation +4. If a design spec/mockup was provided, compare visually +5. Check the browser console for errors +6. Test interactive elements (clicks, form inputs, navigation) +7. Test responsive behavior if applicable +8. Record a GIF if changes involve animation or multi-step flows +9. Report findings with specific visual differences + +If Chrome is not connected, report that visual verification was skipped +and suggest running with --chrome flag. diff --git a/plugins/harness/hooks/hooks.json b/plugins/harness/hooks/hooks.json new file mode 100644 index 0000000..75cc182 --- /dev/null +++ b/plugins/harness/hooks/hooks.json @@ -0,0 +1,172 @@ +{ + "hooks": { + "SessionStart": [ + { + "matcher": "startup", + "hooks": [ + { + "type": "command", + "command": "\"${CLAUDE_PLUGIN_ROOT}/scripts/session-start.sh\"", + "statusMessage": "Initializing harness..." + } + ] + } + ], + + "UserPromptSubmit": [ + { + "hooks": [ + { + "type": "command", + "command": "\"${CLAUDE_PLUGIN_ROOT}/scripts/classify-prompt.sh\"", + "timeout": 5 + } + ] + } + ], + + "PreToolUse": [ + { + "matcher": "Write|Edit", + "hooks": [ + { + "type": "command", + "command": "\"${CLAUDE_PLUGIN_ROOT}/scripts/guard-writes.sh\"", + "statusMessage": "Checking write safety..." + } + ] + } + ], + + "PostToolUse": [ + { + "matcher": "Write|Edit", + "hooks": [ + { + "type": "command", + "command": "\"${CLAUDE_PLUGIN_ROOT}/scripts/post-edit.sh\"", + "statusMessage": "Verifying..." + } + ] + } + ], + + "PostToolUseFailure": [ + { + "matcher": "Bash", + "hooks": [ + { + "type": "command", + "command": "\"${CLAUDE_PLUGIN_ROOT}/scripts/failure-tracker.sh\"" + } + ] + } + ], + + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "\"${CLAUDE_PLUGIN_ROOT}/scripts/stop-gate.sh\"" + } + ] + } + ], + + "SubagentStop": [ + { + "matcher": "implementer", + "hooks": [ + { + "type": "command", + "command": "\"${CLAUDE_PLUGIN_ROOT}/scripts/verify-implementation.sh\"" + } + ] + } + ], + + "TaskCompleted": [ + { + "hooks": [ + { + "type": "command", + "command": "\"${CLAUDE_PLUGIN_ROOT}/scripts/task-gate.sh\"" + } + ] + } + ], + + "TeammateIdle": [ + { + "hooks": [ + { + "type": "command", + "command": "\"${CLAUDE_PLUGIN_ROOT}/scripts/teammate-check.sh\"" + } + ] + } + ], + + "PreCompact": [ + { + "matcher": "auto", + "hooks": [ + { + "type": "command", + "command": "\"${CLAUDE_PLUGIN_ROOT}/scripts/pre-compact.sh\"" + } + ] + } + ], + + "PostCompact": [ + { + "hooks": [ + { + "type": "command", + "command": "\"${CLAUDE_PLUGIN_ROOT}/scripts/post-compact.sh\"" + } + ] + } + ], + + "Notification": [ + { + "matcher": "idle_prompt", + "hooks": [ + { + "type": "command", + "command": "\"${CLAUDE_PLUGIN_ROOT}/scripts/notify.sh\"", + "async": true + } + ] + } + ], + + "InstructionsLoaded": [ + { + "hooks": [ + { + "type": "command", + "command": "\"${CLAUDE_PLUGIN_ROOT}/scripts/log-instructions.sh\"", + "async": true + } + ] + } + ], + + "ConfigChange": [ + { + "matcher": "project_settings|local_settings", + "hooks": [ + { + "type": "command", + "command": "\"${CLAUDE_PLUGIN_ROOT}/scripts/config-changed.sh\"", + "async": true + } + ] + } + ] + } +} diff --git a/plugins/harness/rule-templates/database.md b/plugins/harness/rule-templates/database.md new file mode 100644 index 0000000..69da51f --- /dev/null +++ b/plugins/harness/rule-templates/database.md @@ -0,0 +1,14 @@ +--- +paths: + - "**/migrations/**" + - "**/*.sql" + - "**/schema*" + - "**/models/**" +--- + +# Database Rules +- NEVER drop tables or columns without explicit user confirmation +- Always create reversible migrations (both up and down) +- Test migrations on a copy before applying to production +- Review index impact on large tables +- Check for N+1 query patterns in ORM code diff --git a/plugins/harness/rule-templates/frontend.md b/plugins/harness/rule-templates/frontend.md new file mode 100644 index 0000000..7af8f6a --- /dev/null +++ b/plugins/harness/rule-templates/frontend.md @@ -0,0 +1,17 @@ +--- +paths: + - "**/components/**" + - "**/*.tsx" + - "**/*.jsx" + - "**/*.vue" + - "**/*.svelte" + - "**/*.css" + - "**/*.scss" +--- + +# Frontend Rules +- Check visual output in browser after UI changes (use Chrome if available) +- Test responsive behavior at common breakpoints +- Check browser console for errors and warnings +- Follow existing component patterns (check similar components first) +- Ensure accessibility basics (alt text, ARIA labels, keyboard navigation) diff --git a/plugins/harness/rule-templates/go.md b/plugins/harness/rule-templates/go.md new file mode 100644 index 0000000..dcffa0c --- /dev/null +++ b/plugins/harness/rule-templates/go.md @@ -0,0 +1,11 @@ +--- +paths: + - "**/*.go" +--- + +# Go Rules +- Run `go vet` after changes for static analysis +- Run `golangci-lint run` if available +- Use `go test ./...` for full test suite +- Follow existing error handling patterns (check error returns) +- Use `gofmt` formatting (usually enforced by editor) diff --git a/plugins/harness/rule-templates/infrastructure.md b/plugins/harness/rule-templates/infrastructure.md new file mode 100644 index 0000000..be64e36 --- /dev/null +++ b/plugins/harness/rule-templates/infrastructure.md @@ -0,0 +1,15 @@ +--- +paths: + - "**/terraform/**" + - "**/*.tf" + - "**/docker-compose*" + - "**/Dockerfile*" + - "**/k8s/**" +--- + +# Infrastructure Rules +- ALWAYS run `terraform plan` before `terraform apply` +- NEVER put secrets or credentials in infrastructure files +- Use variables for all configurable values +- Validate Dockerfiles with `hadolint` if available +- For Kubernetes, validate manifests with `kubectl --dry-run=client` diff --git a/plugins/harness/rule-templates/python.md b/plugins/harness/rule-templates/python.md new file mode 100644 index 0000000..fe85fe8 --- /dev/null +++ b/plugins/harness/rule-templates/python.md @@ -0,0 +1,11 @@ +--- +paths: + - "**/*.py" +--- + +# Python Rules +- After major changes, run type checking with `mypy` (if configured) +- Use `ruff` for linting (if available), otherwise `flake8` +- Follow existing code style (check for black/ruff formatting) +- Use type hints for function signatures +- Prefer pathlib over os.path for file operations diff --git a/plugins/harness/rule-templates/rust.md b/plugins/harness/rule-templates/rust.md new file mode 100644 index 0000000..f5e182a --- /dev/null +++ b/plugins/harness/rule-templates/rust.md @@ -0,0 +1,11 @@ +--- +paths: + - "**/*.rs" +--- + +# Rust Rules +- Run `cargo check` for fast compilation verification +- Run `cargo clippy` for lint warnings +- Use `cargo test` for testing +- Follow existing error handling patterns (Result vs unwrap vs expect) +- LSP (rust-analyzer) provides excellent diagnostics -- check after edits diff --git a/plugins/harness/rule-templates/testing.md b/plugins/harness/rule-templates/testing.md new file mode 100644 index 0000000..1f57677 --- /dev/null +++ b/plugins/harness/rule-templates/testing.md @@ -0,0 +1,16 @@ +--- +paths: + - "**/*.test.*" + - "**/*.spec.*" + - "**/test/**" + - "**/tests/**" + - "**/__tests__/**" +--- + +# Testing Rules +- NEVER remove or weaken existing tests +- Tests should verify behavior, not implementation details +- Each test should be independent (no shared mutable state) +- Follow existing test patterns and conventions +- Include descriptive test names that explain the expected behavior +- Cover: happy path, error cases, edge cases, boundary conditions diff --git a/plugins/harness/rule-templates/typescript.md b/plugins/harness/rule-templates/typescript.md new file mode 100644 index 0000000..7167120 --- /dev/null +++ b/plugins/harness/rule-templates/typescript.md @@ -0,0 +1,12 @@ +--- +paths: + - "**/*.ts" + - "**/*.tsx" +--- + +# TypeScript Rules +- After major changes, verify types with `npx tsc --noEmit` +- Use LSP for go-to-definition and find-references when exploring code +- Prefer `interface` over `type` for public API definitions +- Replace `any` types with proper types when touching affected code +- Use strict null checks -- check for `undefined` and `null` explicitly diff --git a/plugins/harness/scripts/classify-prompt.sh b/plugins/harness/scripts/classify-prompt.sh new file mode 100755 index 0000000..e9ef8d1 --- /dev/null +++ b/plugins/harness/scripts/classify-prompt.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +set -euo pipefail +source "$(dirname "$0")/lib/log.sh" 2>/dev/null || true +harness_start_timer || true + +# UserPromptSubmit hook -- fast heuristic classification of prompt complexity +# Performance target: < 200ms + +# Read stdin +INPUT=$(cat) || exit 0 + +# Require jq +command -v jq >/dev/null 2>&1 || exit 0 + +# Extract user prompt; exit silently on missing/bad JSON +PROMPT=$(echo "$INPUT" | jq -r '.prompt // empty' 2>/dev/null) || exit 0 +[[ -n "$PROMPT" ]] || exit 0 + +# Word count +WORD_COUNT=$(echo "$PROMPT" | wc -w | tr -d ' ') + +# Check for indicators (case-insensitive) +PROMPT_LOWER=$(echo "$PROMPT" | tr '[:upper:]' '[:lower:]') + +has_pattern() { + echo "$PROMPT_LOWER" | grep -qE "$1" 2>/dev/null +} + +# File references count (paths like src/foo.ts, ./bar.py, etc.) +FILE_REFS=$(echo "$PROMPT" | grep -oE '[a-zA-Z0-9_./-]*\/[a-zA-Z0-9_./-]+\.[a-zA-Z]{1,6}' 2>/dev/null | wc -l | tr -d ' ') || FILE_REFS=0 + +# Classify complexity +COMPLEXITY="simple" +EFFORT="low" +NEEDS_INTERVIEW=false + +if [[ "$WORD_COUNT" -gt 500 ]] || has_pattern '(requirements|specification|prd|acceptance criteria)'; then + COMPLEXITY="massive" + EFFORT="max" +elif [[ "$WORD_COUNT" -gt 150 ]] || { [[ "$FILE_REFS" -gt 5 ]] && has_pattern '(migration|cross-cutting|refactor.*all|across)'; }; then + COMPLEXITY="complex" + EFFORT="high" +elif [[ "$WORD_COUNT" -gt 50 ]] || { [[ "$WORD_COUNT" -gt 15 ]] && has_pattern '(add.*feature|new feature|refactor|implement|create.*new|build.*new|design.*system|migrate|redesign)'; }; then + COMPLEXITY="medium" + EFFORT="medium" +else + # Simple: short prompts about fixes, typos, small changes + COMPLEXITY="simple" + EFFORT="low" +fi + +# Vague prompt detection for interview recommendation +if [[ "$COMPLEXITY" != "simple" ]]; then + if has_pattern '(make it better|improve|something|somehow|not sure|maybe)' || \ + { [[ "$WORD_COUNT" -lt 15 ]] && [[ "$COMPLEXITY" != "simple" ]]; }; then + NEEDS_INTERVIEW=true + fi +fi + +# Build recommendation +RECOMMEND="" +case "$COMPLEXITY" in + simple) RECOMMEND="Proceed directly with implementation and verify." ;; + medium) RECOMMEND="Consider: explore the codebase first, then plan, then implement with verification." ;; + complex) RECOMMEND="Consider: decompose into tasks, use subagents for parallel work, verify each piece." ;; + massive) RECOMMEND="Consider: ingest spec fully, decompose into granular tasks, use agent teams or batch processing." ;; +esac + +if [[ "$NEEDS_INTERVIEW" == "true" ]]; then + RECOMMEND="The prompt is vague for this complexity level -- clarify requirements first. $RECOMMEND" +fi + +# Simple tasks: no context injection (zero overhead) +if [[ "$COMPLEXITY" == "simple" ]]; then + harness_log "classify-prompt" "simple" + exit 0 +fi + +CONTEXT="[HARNESS] Task classified as $COMPLEXITY. Recommended effort: $EFFORT. $RECOMMEND" + +harness_log "classify-prompt" "$COMPLEXITY" +jq -nc --arg ctx "$CONTEXT" '{"additionalContext": $ctx}' diff --git a/plugins/harness/scripts/config-changed.sh b/plugins/harness/scripts/config-changed.sh new file mode 100755 index 0000000..bbf6fee --- /dev/null +++ b/plugins/harness/scripts/config-changed.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +set -euo pipefail +source "$(dirname "$0")/lib/log.sh" 2>/dev/null || true +harness_start_timer 2>/dev/null || true + +# ConfigChange hook (matcher: project_settings|local_settings, async) -- monitor config changes +# Performance target: < 10ms + +# Early exit if debug mode is not enabled +[[ "${HARNESS_DEBUG:-}" == "1" ]] || { cat >/dev/null; harness_log "config-changed" "skip"; exit 0; } + +# Read stdin +INPUT=$(cat) || exit 0 + +# Require jq +command -v jq >/dev/null 2>&1 || exit 0 + +# Extract fields +SOURCE=$(echo "$INPUT" | jq -r '.source // "unknown"' 2>/dev/null) || exit 0 +FILE_PATH=$(echo "$INPUT" | jq -r '.file_path // "unknown"' 2>/dev/null) || exit 0 + +# Append to debug log +LOG_DIR="${HOME}/.claude" +mkdir -p "$LOG_DIR" 2>/dev/null || exit 0 +echo "[$(date -Iseconds)] ConfigChange: source=$SOURCE file=$FILE_PATH" \ + >> "${LOG_DIR}/harness-debug.log" 2>/dev/null || true + +harness_log "config-changed" "logged" +exit 0 diff --git a/plugins/harness/scripts/failure-tracker.sh b/plugins/harness/scripts/failure-tracker.sh new file mode 100755 index 0000000..128501b --- /dev/null +++ b/plugins/harness/scripts/failure-tracker.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +set -euo pipefail +source "$(dirname "$0")/lib/log.sh" 2>/dev/null || true +harness_start_timer || true + +# PostToolUseFailure hook (matcher: Bash, async) -- track failure patterns +# Performance target: < 50ms + +# Read stdin +INPUT=$(cat) || exit 0 + +# Require jq +command -v jq >/dev/null 2>&1 || exit 0 + +# Extract error and command +ERROR=$(echo "$INPUT" | jq -r '.error // empty' 2>/dev/null) || exit 0 +COMMAND=$(echo "$INPUT" | jq -r '.tool_input.command // empty' 2>/dev/null) || exit 0 + +[[ -n "$ERROR" ]] || exit 0 + +SESSION_ID="${CLAUDE_SESSION_ID:-$$}" +LOG_FILE="/tmp/harness-failures-${SESSION_ID}.log" + +# Create a simple error signature for pattern matching (first line, key words) +ERROR_SIG=$(echo "$ERROR" | head -1 | sed 's/[0-9]//g' | tr -s ' ' | cut -c1-80) + +# Log rotation: if log exceeds 100 lines, truncate to last 50 +if [[ -f "$LOG_FILE" ]]; then + LINE_COUNT=$(wc -l < "$LOG_FILE" 2>/dev/null) || LINE_COUNT=0 + if [[ "$LINE_COUNT" -gt 100 ]]; then + tail -50 "$LOG_FILE" > "${LOG_FILE}.tmp" 2>/dev/null && mv "${LOG_FILE}.tmp" "$LOG_FILE" 2>/dev/null || true + fi +fi + +# Append failure record +echo "$(date +%s)|$ERROR_SIG|$COMMAND" >> "$LOG_FILE" 2>/dev/null || exit 0 + +# Count consecutive similar failures (last N lines with same signature) +SIMILAR_COUNT=0 +if [[ -f "$LOG_FILE" ]]; then + SIMILAR_COUNT=$(tail -10 "$LOG_FILE" | grep -cF "$ERROR_SIG" 2>/dev/null) || true +fi + +if [[ "$SIMILAR_COUNT" -ge 3 ]]; then + harness_log "failure-tracker" "warn" "3+ similar failures: $ERROR_SIG" + jq -nc '{"additionalContext": "[HARNESS] The same type of error has occurred 3+ times. Consider: (1) checking if the command/tool is installed, (2) reviewing the approach, (3) using /harness:recovery skill."}' +else + harness_log "failure-tracker" "track" "count=$SIMILAR_COUNT sig=$ERROR_SIG" +fi + +exit 0 diff --git a/plugins/harness/scripts/guard-writes.sh b/plugins/harness/scripts/guard-writes.sh new file mode 100755 index 0000000..87c837f --- /dev/null +++ b/plugins/harness/scripts/guard-writes.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +set -euo pipefail +source "$(dirname "$0")/lib/log.sh" 2>/dev/null || true +harness_start_timer || true + +# PreToolUse hook (matcher: Write|Edit) -- prevent destructive writes to critical files +# Performance target: < 100ms + +# Read stdin +INPUT=$(cat) || exit 0 + +# Require jq +command -v jq >/dev/null 2>&1 || exit 0 + +# Extract file path; exit silently on bad JSON +FILE_PATH=$(echo "$INPUT" | jq -r '.tool_input.file_path // empty' 2>/dev/null) || exit 0 +[[ -n "$FILE_PATH" ]] || exit 0 + +BASENAME=$(basename "$FILE_PATH") + +# --- Lock file warning (non-blocking) --- +case "$BASENAME" in + package-lock.json|yarn.lock|pnpm-lock.yaml|Cargo.lock) + # Warn via additionalContext but do not block + harness_log "guard-writes" "warn" "lock file: $BASENAME" + jq -nc '{"additionalContext": "[HARNESS] Warning: editing a lock file directly. This is usually generated automatically."}' + exit 0 + ;; +esac + +# --- Test file protection --- +IS_TEST_FILE=false +case "$BASENAME" in + *.test.*|*.spec.*|*_test.*|test_*.*|*_spec.*|*Test.*|*Tests.*) IS_TEST_FILE=true ;; +esac + +# Also check path patterns +case "$FILE_PATH" in + */__tests__/*|*/test/*|*/tests/*) IS_TEST_FILE=true ;; +esac + +if [[ "$IS_TEST_FILE" == "true" ]]; then + OLD_STRING=$(echo "$INPUT" | jq -r '.tool_input.old_string // empty' 2>/dev/null) || true + NEW_STRING=$(echo "$INPUT" | jq -r '.tool_input.new_string // empty' 2>/dev/null) || true + + # Only check Edit tool (has old_string); Write tool full rewrites are harder to judge + if [[ -n "$OLD_STRING" ]]; then + OLD_LINES=$(echo "$OLD_STRING" | wc -l) + NEW_LINES=$(echo "$NEW_STRING" | wc -l) + + # Check if old_string contains test constructs that new_string doesn't + OLD_HAS_TESTS=false + echo "$OLD_STRING" | grep -qE '(test\(|it\(|describe\(|def test_|fn test_|func Test|#\[test\])' 2>/dev/null && OLD_HAS_TESTS=true + + NEW_HAS_TESTS=false + if [[ -n "$NEW_STRING" ]]; then + echo "$NEW_STRING" | grep -qE '(test\(|it\(|describe\(|def test_|fn test_|func Test|#\[test\])' 2>/dev/null && NEW_HAS_TESTS=true + fi + + # Block if old has tests and new doesn't (test removal) + if [[ "$OLD_HAS_TESTS" == "true" && "$NEW_HAS_TESTS" == "false" ]]; then + harness_log "guard-writes" "block" "test removal in $BASENAME" + echo "Blocked: Cannot remove test cases. Tests must only be added or modified, never removed." >&2 + exit 2 + fi + + # Block if significant line reduction in test file with test constructs + if [[ "$OLD_HAS_TESTS" == "true" && "$OLD_LINES" -gt 5 && "$NEW_LINES" -lt $(( OLD_LINES / 2 )) ]]; then + harness_log "guard-writes" "block" "significant line reduction in $BASENAME" + echo "Blocked: Cannot remove test cases. Tests must only be added or modified, never removed." >&2 + exit 2 + fi + fi +fi + +harness_log "guard-writes" "pass" +exit 0 diff --git a/plugins/harness/scripts/lib/detect.sh b/plugins/harness/scripts/lib/detect.sh new file mode 100644 index 0000000..dbea2d5 --- /dev/null +++ b/plugins/harness/scripts/lib/detect.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# Shared project-detection helpers for harness hooks +# Source this at the top of each hook script: +# source "$(dirname "$0")/lib/detect.sh" 2>/dev/null || true +# +# Functions: +# detect_project_type # "Node.js (TypeScript)", "Rust", "Go", "Python", etc. +# detect_test_cmd # "npm test", "cargo test", "go test ./...", etc. +# detect_lint_cmd # "npm run lint", "cargo clippy", etc. +# detect_build_cmd # "npm run build", "cargo build", "go build ./...", etc. +# +# Each function prints a command string to stdout and returns 0. +# If nothing is detected, it prints nothing and still returns 0. + +detect_project_type() { + if [[ -f "package.json" ]]; then + if [[ -f "tsconfig.json" ]]; then + echo "Node.js (TypeScript)" + else + echo "Node.js" + fi + return + elif [[ -f "Cargo.toml" ]]; then + echo "Rust"; return + elif [[ -f "go.mod" ]]; then + echo "Go"; return + elif [[ -f "pyproject.toml" || -f "setup.py" ]]; then + echo "Python"; return + elif [[ -f "Makefile" ]]; then + echo "Make-based"; return + fi + return 0 +} + +detect_test_cmd() { + if [[ -f "package.json" ]] && jq -e '.scripts.test' package.json >/dev/null 2>&1; then + echo "npm test"; return + elif [[ -f "Cargo.toml" ]]; then + echo "cargo test"; return + elif [[ -f "go.mod" ]]; then + echo "go test ./..."; return + elif [[ -f "pyproject.toml" || -f "setup.py" ]]; then + echo "pytest"; return + elif [[ -f "Makefile" ]] && grep -q '^test:' Makefile 2>/dev/null; then + echo "make test"; return + fi + return 0 +} + +detect_lint_cmd() { + if [[ -f "package.json" ]] && jq -e '.scripts.lint' package.json >/dev/null 2>&1; then + echo "npm run lint"; return + elif [[ -f "Cargo.toml" ]] && command -v cargo >/dev/null 2>&1; then + echo "cargo clippy"; return + elif [[ -f "go.mod" ]] && command -v golangci-lint >/dev/null 2>&1; then + echo "golangci-lint run"; return + elif [[ -f "pyproject.toml" || -f "setup.py" ]] && command -v ruff >/dev/null 2>&1; then + echo "ruff check ."; return + fi + return 0 +} + +detect_build_cmd() { + if [[ -f "package.json" ]] && jq -e '.scripts.build' package.json >/dev/null 2>&1; then + echo "npm run build"; return + elif [[ -f "Cargo.toml" ]]; then + echo "cargo build"; return + elif [[ -f "go.mod" ]]; then + echo "go build ./..."; return + elif [[ -f "Makefile" ]] && grep -q '^build:' Makefile 2>/dev/null; then + echo "make build"; return + fi + return 0 +} diff --git a/plugins/harness/scripts/lib/log.sh b/plugins/harness/scripts/lib/log.sh new file mode 100644 index 0000000..051e460 --- /dev/null +++ b/plugins/harness/scripts/lib/log.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Shared logging helper for harness hooks +# Source this at the top of each hook script: +# source "$(dirname "$0")/lib/log.sh" 2>/dev/null || true +# +# Usage: +# harness_start_timer # call once at script start +# harness_log [detail] # call before each exit +# +# Writes JSONL to .harness/logs/harness.log +# ALL failures are silent -- logging must never break hooks + +_HARNESS_LOG_DIR="${HARNESS_LOG_DIR:-.harness/logs}" +_HARNESS_LOG_FILE="${_HARNESS_LOG_DIR}/harness.log" +_HARNESS_START_MS="" + +harness_start_timer() { + _HARNESS_START_MS=$(date +%s%3N 2>/dev/null) || _HARNESS_START_MS="" + # On macOS, %3N becomes literal "3N" -- detect and fallback + [[ "${_HARNESS_START_MS:-}" == *N* ]] && _HARNESS_START_MS="$(date +%s)000" + return 0 +} + +harness_log() { + { + local hook="${1:-unknown}" decision="${2:-ok}" detail="${3:-}" + local ts duration_ms="" + + ts=$(date -u +"%Y-%m-%dT%H:%M:%SZ" 2>/dev/null) || ts="unknown" + + if [[ -n "$_HARNESS_START_MS" ]]; then + local end_ms + end_ms=$(date +%s%3N 2>/dev/null) || end_ms="" + [[ "${end_ms:-}" == *N* ]] && end_ms="$(date +%s)000" + [[ -n "$end_ms" ]] && duration_ms=$(( end_ms - _HARNESS_START_MS )) + fi + + mkdir -p "$_HARNESS_LOG_DIR" 2>/dev/null || return 0 + + if command -v jq >/dev/null 2>&1; then + jq -nc \ + --arg ts "$ts" \ + --arg hook "$hook" \ + --arg decision "$decision" \ + --arg detail "$detail" \ + --argjson dur "${duration_ms:-null}" \ + '{timestamp:$ts, hook:$hook, decision:$decision} + + (if $detail != "" then {detail:$detail} else {} end) + + (if $dur then {duration_ms:$dur} else {} end)' \ + >> "$_HARNESS_LOG_FILE" 2>/dev/null + else + # Fallback without jq + local line="{\"timestamp\":\"${ts}\",\"hook\":\"${hook}\",\"decision\":\"${decision}\"" + [[ -n "$detail" ]] && line="${line},\"detail\":$(printf '%s' "$detail" | sed 's/\\/\\\\/g;s/"/\\"/g;s/\t/\\t/g' | sed 's/.*/"&"/')" + [[ -n "$duration_ms" ]] && line="${line},\"duration_ms\":${duration_ms}" + echo "${line}}" >> "$_HARNESS_LOG_FILE" 2>/dev/null + fi + } 2>/dev/null || true +} diff --git a/plugins/harness/scripts/log-instructions.sh b/plugins/harness/scripts/log-instructions.sh new file mode 100755 index 0000000..21c0b9b --- /dev/null +++ b/plugins/harness/scripts/log-instructions.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail +source "$(dirname "$0")/lib/log.sh" 2>/dev/null || true +harness_start_timer 2>/dev/null || true + +# InstructionsLoaded hook (async) -- debug logging for rule/instruction loading +# Performance target: < 10ms + +# Early exit if debug mode is not enabled +[[ "${HARNESS_DEBUG:-}" == "1" ]] || { cat >/dev/null; harness_log "log-instructions" "skip"; exit 0; } + +# Read stdin +INPUT=$(cat) || exit 0 + +# Require jq +command -v jq >/dev/null 2>&1 || exit 0 + +# Extract fields +FILE_PATH=$(echo "$INPUT" | jq -r '.file_path // "unknown"' 2>/dev/null) || exit 0 +MEMORY_TYPE=$(echo "$INPUT" | jq -r '.memory_type // "unknown"' 2>/dev/null) || exit 0 +LOAD_REASON=$(echo "$INPUT" | jq -r '.load_reason // "unknown"' 2>/dev/null) || exit 0 + +# Append to debug log +LOG_DIR="${HOME}/.claude" +mkdir -p "$LOG_DIR" 2>/dev/null || exit 0 +echo "[$(date -Iseconds)] InstructionsLoaded: file=$FILE_PATH type=$MEMORY_TYPE reason=$LOAD_REASON" \ + >> "${LOG_DIR}/harness-debug.log" 2>/dev/null || true + +harness_log "log-instructions" "logged" +exit 0 diff --git a/plugins/harness/scripts/notify.sh b/plugins/harness/scripts/notify.sh new file mode 100755 index 0000000..08582c5 --- /dev/null +++ b/plugins/harness/scripts/notify.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +set -euo pipefail +source "$(dirname "$0")/lib/log.sh" 2>/dev/null || true +harness_start_timer 2>/dev/null || true + +# Notification hook (matcher: idle_prompt, async) -- desktop notification +# Performance target: < 200ms + +# Read stdin (consume it) +cat > /dev/null 2>&1 || true + +# Detect OS and send notification +_NOTIFY_PLATFORM="" +case "$(uname -s)" in + Darwin) + osascript -e 'display notification "Claude is ready" with title "Harness"' 2>/dev/null || true + _NOTIFY_PLATFORM="macOS" + ;; + Linux) + if command -v notify-send >/dev/null 2>&1; then + notify-send 'Harness' 'Claude is ready for your next instruction' 2>/dev/null || true + fi + _NOTIFY_PLATFORM="Linux" + ;; +esac + +harness_log "notify" "sent" "$_NOTIFY_PLATFORM" +exit 0 diff --git a/plugins/harness/scripts/post-compact.sh b/plugins/harness/scripts/post-compact.sh new file mode 100755 index 0000000..5066c54 --- /dev/null +++ b/plugins/harness/scripts/post-compact.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +set -euo pipefail +source "$(dirname "$0")/lib/log.sh" 2>/dev/null || true +harness_start_timer 2>/dev/null || true + +# PostCompact hook (async) -- inject context if state changed during compaction +# Performance target: < 100ms + +# Read stdin (consume it) +cat > /dev/null 2>&1 || true + +# Require jq +command -v jq >/dev/null 2>&1 || exit 0 + +SESSION_ID="${CLAUDE_SESSION_ID:-$$}" +STATE_FILE="/tmp/harness-precompact-${SESSION_ID}.json" + +# If no pre-compact state was saved, nothing to compare +[[ -f "$STATE_FILE" ]] || exit 0 + +# Read saved state +SAVED_BRANCH=$(jq -r '.branch // empty' "$STATE_FILE" 2>/dev/null) || exit 0 +SAVED_COMMITS=$(jq -r '.commits[0] // empty' "$STATE_FILE" 2>/dev/null) || exit 0 +SAVED_MODIFIED=$(jq -r '.modified_count // 0' "$STATE_FILE" 2>/dev/null) || exit 0 + +# Get current state +if ! command -v git >/dev/null 2>&1 || ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then + exit 0 +fi + +CURRENT_BRANCH=$(git branch --show-current 2>/dev/null || echo "detached") +CURRENT_HEAD=$(git rev-parse HEAD 2>/dev/null || echo "") +CURRENT_MODIFIED=$(git status --porcelain 2>/dev/null | wc -l | tr -d ' ') + +# Compare states +changes=() + +if [[ "$CURRENT_BRANCH" != "$SAVED_BRANCH" && -n "$SAVED_BRANCH" ]]; then + changes+=("Branch changed: $SAVED_BRANCH -> $CURRENT_BRANCH.") +fi + +if [[ -n "$CURRENT_HEAD" && -n "$SAVED_COMMITS" && "$CURRENT_HEAD" != "$SAVED_COMMITS" ]]; then + NEW_COMMITS=$(git rev-list "${SAVED_COMMITS}..HEAD" --count 2>/dev/null) || NEW_COMMITS=0 + if [[ "$NEW_COMMITS" -gt 0 ]]; then + changes+=("$NEW_COMMITS new commit(s) since compaction.") + fi +fi + +if [[ "$CURRENT_MODIFIED" != "$SAVED_MODIFIED" ]]; then + changes+=("Uncommitted files: $SAVED_MODIFIED -> $CURRENT_MODIFIED.") +fi + +# Clean up state file +rm -f "$STATE_FILE" 2>/dev/null || true + +# Output only if something changed +if [[ ${#changes[@]} -gt 0 ]]; then + CONTEXT="[HARNESS] State changed during compaction: $(IFS=' '; echo "${changes[*]}")" + jq -nc --arg ctx "$CONTEXT" '{"additionalContext": $ctx}' + harness_log "post-compact" "inject" +else + harness_log "post-compact" "silent" +fi + +exit 0 diff --git a/plugins/harness/scripts/post-edit.sh b/plugins/harness/scripts/post-edit.sh new file mode 100755 index 0000000..e165f47 --- /dev/null +++ b/plugins/harness/scripts/post-edit.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +set -euo pipefail +source "$(dirname "$0")/lib/log.sh" 2>/dev/null || true +harness_start_timer || true + +# PostToolUse hook (matcher: Write|Edit, async) -- fast verification on edited files +# Performance target: < 3s + +# Read stdin +INPUT=$(cat) || exit 0 + +# Require jq +command -v jq >/dev/null 2>&1 || exit 0 + +# Extract file path +FILE_PATH=$(echo "$INPUT" | jq -r '.tool_input.file_path // empty' 2>/dev/null) || exit 0 +[[ -n "$FILE_PATH" ]] || exit 0 + +ERRORS="" + +case "$FILE_PATH" in + *.ts|*.tsx) + if [[ -f "tsconfig.json" ]] && command -v npx >/dev/null 2>&1; then + ERRORS=$(npx tsc --noEmit --pretty 2>&1 | head -20) || true + # tsc outputs nothing on success + if [[ -z "$ERRORS" ]]; then harness_log "post-edit" "clean"; exit 0; fi + else + harness_log "post-edit" "skip" "no tsc for $FILE_PATH" + exit 0 + fi + ;; + *.py) + if command -v ruff >/dev/null 2>&1; then + ERRORS=$(ruff check "$FILE_PATH" 2>&1 | head -10) || true + if [[ -z "$ERRORS" ]]; then harness_log "post-edit" "clean"; exit 0; fi + elif command -v python3 >/dev/null 2>&1; then + ERRORS=$(python3 -m py_compile "$FILE_PATH" 2>&1) || true + if [[ -z "$ERRORS" ]]; then harness_log "post-edit" "clean"; exit 0; fi + elif command -v python >/dev/null 2>&1; then + ERRORS=$(python -m py_compile "$FILE_PATH" 2>&1) || true + if [[ -z "$ERRORS" ]]; then harness_log "post-edit" "clean"; exit 0; fi + else + harness_log "post-edit" "skip" "no python checker for $FILE_PATH" + exit 0 + fi + ;; + *.rs) + # Skip: cargo check is slow, LSP handles it + harness_log "post-edit" "skip" "rust files use LSP" + exit 0 + ;; + *.go) + if command -v go >/dev/null 2>&1; then + ERRORS=$(go vet "$FILE_PATH" 2>&1 | head -10) || true + if [[ -z "$ERRORS" ]]; then harness_log "post-edit" "clean"; exit 0; fi + else + harness_log "post-edit" "skip" "no go for $FILE_PATH" + exit 0 + fi + ;; + *.js|*.jsx) + if command -v npx >/dev/null 2>&1; then + # Check for eslint config + HAS_ESLINT=false + for cfg in .eslintrc .eslintrc.js .eslintrc.json .eslintrc.yml .eslintrc.yaml eslint.config.js eslint.config.mjs eslint.config.cjs eslint.config.ts; do + [[ -f "$cfg" ]] && HAS_ESLINT=true && break + done + # Also check package.json for eslintConfig + if [[ "$HAS_ESLINT" == "false" && -f "package.json" ]]; then + jq -e '.eslintConfig' package.json >/dev/null 2>&1 && HAS_ESLINT=true + fi + if [[ "$HAS_ESLINT" == "true" ]]; then + ERRORS=$(npx eslint "$FILE_PATH" --no-warn-ignored 2>&1 | head -10) || true + if [[ -z "$ERRORS" ]]; then harness_log "post-edit" "clean"; exit 0; fi + else + harness_log "post-edit" "skip" "no eslint config for $FILE_PATH" + exit 0 + fi + else + harness_log "post-edit" "skip" "no npx for $FILE_PATH" + exit 0 + fi + ;; + *) + # Unsupported file type + harness_log "post-edit" "skip" "unsupported file type: $FILE_PATH" + exit 0 + ;; +esac + +# If we got here, there are errors +if [[ -n "$ERRORS" ]]; then + BASENAME=$(basename "$FILE_PATH") + ERROR_COUNT=$(echo "$ERRORS" | wc -l | tr -d ' ') + harness_log "post-edit" "errors" "$BASENAME: ${ERROR_COUNT} error line(s)" + jq -nc --arg ctx "Lint/type errors in $BASENAME:"$'\n'"$ERRORS" \ + '{"additionalContext": $ctx}' +fi diff --git a/plugins/harness/scripts/pre-compact.sh b/plugins/harness/scripts/pre-compact.sh new file mode 100755 index 0000000..7f5dc72 --- /dev/null +++ b/plugins/harness/scripts/pre-compact.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +set -euo pipefail +source "$(dirname "$0")/lib/log.sh" 2>/dev/null || true +harness_start_timer 2>/dev/null || true + +# PreCompact hook (matcher: auto) -- preserve critical state before compaction +# Performance target: < 100ms + +# Read stdin (consume it) +cat > /dev/null 2>&1 || true + +# Require jq for output +command -v jq >/dev/null 2>&1 || exit 0 + +SESSION_ID="${CLAUDE_SESSION_ID:-$$}" +STATE_FILE="/tmp/harness-precompact-${SESSION_ID}.json" + +# Collect git state +BRANCH="" +COMMITS="[]" +MODIFIED=0 + +if command -v git >/dev/null 2>&1 && git rev-parse --is-inside-work-tree >/dev/null 2>&1; then + BRANCH=$(git branch --show-current 2>/dev/null || echo "detached") + COMMITS=$(git log --oneline -5 --format='%H' 2>/dev/null | jq -Rn '[inputs]' 2>/dev/null || echo '[]') + MODIFIED=$(git status --porcelain 2>/dev/null | wc -l | tr -d ' ') +fi + +# Write state file +jq -nc \ + --arg branch "$BRANCH" \ + --argjson commits "$COMMITS" \ + --argjson modified "$MODIFIED" \ + --argjson ts "$(date +%s)" \ + '{branch: $branch, commits: $commits, modified_count: $modified, timestamp: $ts}' \ + > "$STATE_FILE" 2>/dev/null || true + +harness_log "pre-compact" "saved" "$BRANCH" +exit 0 diff --git a/plugins/harness/scripts/session-start.sh b/plugins/harness/scripts/session-start.sh new file mode 100755 index 0000000..cfa32c2 --- /dev/null +++ b/plugins/harness/scripts/session-start.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash +set -euo pipefail +source "$(dirname "$0")/lib/log.sh" 2>/dev/null || true +source "$(dirname "$0")/lib/detect.sh" 2>/dev/null || true +harness_start_timer || true + +# SessionStart hook (matcher: startup) -- detect project environment +# Performance target: < 200ms + +# Read stdin +INPUT=$(cat) || exit 0 + +# Require jq +command -v jq >/dev/null 2>&1 || exit 0 + +# Parse source field; exit silently if not startup or bad JSON +SOURCE=$(echo "$INPUT" | jq -r '.source // empty' 2>/dev/null) || exit 0 +if [[ "$SOURCE" != "startup" ]]; then + harness_log "session-start" "skip" "source=$SOURCE" + exit 0 +fi + +context_parts=() + +# --- Detect project type and verification commands --- +PROJECT_TYPE=$(detect_project_type) +TEST_CMD=$(detect_test_cmd) +LINT_CMD=$(detect_lint_cmd) +BUILD_CMD=$(detect_build_cmd) + +if [[ -n "$PROJECT_TYPE" ]]; then + part="Project: $PROJECT_TYPE." + if [[ -n "$TEST_CMD" ]]; then part="$part Test: \`$TEST_CMD\`."; fi + if [[ -n "$LINT_CMD" ]]; then part="$part Lint: \`$LINT_CMD\`."; fi + if [[ -n "$BUILD_CMD" ]]; then part="$part Build: \`$BUILD_CMD\`."; fi + context_parts+=("$part") +fi + +# --- Git status --- +if command -v git >/dev/null 2>&1 && git rev-parse --is-inside-work-tree >/dev/null 2>&1; then + BRANCH=$(git branch --show-current 2>/dev/null || echo "detached") + CHANGES=$(git status --porcelain 2>/dev/null | wc -l | tr -d ' ') + git_part="Branch: $BRANCH" + if [[ "$CHANGES" -gt 0 ]]; then git_part="$git_part (+$CHANGES uncommitted)"; fi + context_parts+=("$git_part.") +fi + +# --- Task list --- +if [[ -n "${CLAUDE_CODE_TASK_LIST_ID:-}" ]]; then + context_parts+=("Tasks may be available via Ctrl+T.") +fi + +# Output only if we have something useful +if [[ ${#context_parts[@]} -eq 0 ]]; then + harness_log "session-start" "silent" + exit 0 +fi + +CONTEXT=$(IFS=' '; echo "${context_parts[*]}") +harness_log "session-start" "inject" "detected: ${PROJECT_TYPE:-unknown}" +jq -nc --arg ctx "$CONTEXT" '{"additionalContext": $ctx}' diff --git a/plugins/harness/scripts/stop-gate.sh b/plugins/harness/scripts/stop-gate.sh new file mode 100755 index 0000000..49f98bd --- /dev/null +++ b/plugins/harness/scripts/stop-gate.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +set -euo pipefail +source "$(dirname "$0")/lib/log.sh" 2>/dev/null || true +source "$(dirname "$0")/lib/detect.sh" 2>/dev/null || true +harness_start_timer || true + +# Stop hook (blocking) -- prevent stopping with failing tests +# Performance target: < 30s (includes running tests) + +# Read stdin +INPUT=$(cat) || exit 0 + +# Require jq +command -v jq >/dev/null 2>&1 || exit 0 + +# Check stop_hook_active to prevent infinite loops +STOP_ACTIVE=$(echo "$INPUT" | jq -r '.stop_hook_active // false' 2>/dev/null) || exit 0 +if [[ "$STOP_ACTIVE" == "true" ]]; then + harness_log "stop-gate" "loop-prevention" + exit 0 +fi + +# detect_test_cmd and detect_lint_cmd provided by lib/detect.sh +TEST_CMD=$(detect_test_cmd) +LINT_CMD=$(detect_lint_cmd 2>/dev/null) || LINT_CMD="" + +# If no verification command found, don't block +if [[ -z "$TEST_CMD" && -z "$LINT_CMD" ]]; then + harness_log "stop-gate" "skip" "no verification command found" + exit 0 +fi + +BLOCK_MSG="" + +# Run tests with timeout (if test command exists) +if [[ -n "$TEST_CMD" ]]; then + TEST_OUTPUT=$(timeout 30 bash -c "$TEST_CMD" 2>&1) || { + EXIT_CODE=$? + if [[ "$EXIT_CODE" -eq 124 ]]; then + harness_log "stop-gate" "block" "verification timed out" + echo "Verification timed out after 30s running: $TEST_CMD" >&2 + exit 2 + fi + TRUNCATED=$(echo "$TEST_OUTPUT" | head -c 500) + BLOCK_MSG="Tests failing. Output of \`$TEST_CMD\`:"$'\n'"$TRUNCATED" + } +fi + +# Run lint with timeout (if lint command exists) +if [[ -n "$LINT_CMD" ]]; then + LINT_OUTPUT=$(timeout 30 bash -c "$LINT_CMD" 2>&1) || { + EXIT_CODE=$? + if [[ "$EXIT_CODE" -eq 124 ]]; then + harness_log "stop-gate" "block" "lint timed out" + echo "Lint timed out after 30s running: $LINT_CMD" >&2 + exit 2 + fi + LINT_TRUNCATED=$(echo "$LINT_OUTPUT" | head -c 500) + if [[ -n "$BLOCK_MSG" ]]; then + BLOCK_MSG="${BLOCK_MSG}"$'\n\n'"Lint failing. Output of \`$LINT_CMD\`:"$'\n'"$LINT_TRUNCATED" + else + BLOCK_MSG="Lint failing. Output of \`$LINT_CMD\`:"$'\n'"$LINT_TRUNCATED" + fi + } +fi + +# Block if any failures +if [[ -n "$BLOCK_MSG" ]]; then + FIRST_LINE=$(echo "$BLOCK_MSG" | head -1) + harness_log "stop-gate" "block" "$FIRST_LINE" + echo "Cannot stop. $BLOCK_MSG" >&2 + exit 2 +fi + +# All checks passed +harness_log "stop-gate" "pass" +exit 0 diff --git a/plugins/harness/scripts/task-gate.sh b/plugins/harness/scripts/task-gate.sh new file mode 100755 index 0000000..364e6d8 --- /dev/null +++ b/plugins/harness/scripts/task-gate.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +set -euo pipefail +source "$(dirname "$0")/lib/log.sh" 2>/dev/null || true +source "$(dirname "$0")/lib/detect.sh" 2>/dev/null || true +harness_start_timer 2>/dev/null || true + +# TaskCompleted hook (blocking) -- verify task completion claim is valid +# Performance target: < 30s + +# Read stdin (consume it) +cat > /dev/null 2>&1 || true +# Detect verification command +TEST_CMD=$(detect_test_cmd) + +# If no test command found, allow completion +[[ -n "$TEST_CMD" ]] || { harness_log "task-gate" "skip"; exit 0; } + +# Run tests with timeout +OUTPUT=$(timeout 30 bash -c "$TEST_CMD" 2>&1) || { + EXIT_CODE=$? + if [[ "$EXIT_CODE" -eq 124 ]]; then + echo "Cannot mark task complete: test verification timed out after 30s running: $TEST_CMD" >&2 + harness_log "task-gate" "block" "timeout after 30s" + exit 2 + fi + TRUNCATED=$(echo "$OUTPUT" | head -c 500) + echo "Cannot mark task complete: tests are failing. Output: $TRUNCATED" >&2 + harness_log "task-gate" "block" "$TRUNCATED" + exit 2 +} + +# Tests passed +harness_log "task-gate" "pass" +exit 0 diff --git a/plugins/harness/scripts/teammate-check.sh b/plugins/harness/scripts/teammate-check.sh new file mode 100755 index 0000000..24514ec --- /dev/null +++ b/plugins/harness/scripts/teammate-check.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +set -euo pipefail +source "$(dirname "$0")/lib/log.sh" 2>/dev/null || true +harness_start_timer 2>/dev/null || true + +# TeammateIdle hook (blocking) -- ensure teammates pick up remaining work +# Performance target: < 5s + +# Read stdin +INPUT=$(cat) || exit 0 + +# Require jq +command -v jq >/dev/null 2>&1 || exit 0 + +# Extract teammate name +TEAMMATE=$(echo "$INPUT" | jq -r '.teammate_name // empty' 2>/dev/null) || exit 0 + +# Instead of spawning a recursive claude session (expensive and potentially recursive), +# inject a reminder to the lead agent and allow the idle transition. +harness_log "teammate-check" "pass" "injecting idle reminder for $TEAMMATE" +jq -nc --arg name "$TEAMMATE" '{"additionalContext": ("Teammate " + $name + " going idle. Check if there are unclaimed tasks in the task list that should be assigned.")}' +exit 0 diff --git a/plugins/harness/scripts/verify-implementation.sh b/plugins/harness/scripts/verify-implementation.sh new file mode 100755 index 0000000..f21527f --- /dev/null +++ b/plugins/harness/scripts/verify-implementation.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +set -euo pipefail +source "$(dirname "$0")/lib/log.sh" 2>/dev/null || true +harness_start_timer || true + +# SubagentStop hook (matcher: implementer) -- verify implementer work is complete +# Performance target: < 100ms + +# Read stdin +INPUT=$(cat) || exit 0 + +# Require jq +command -v jq >/dev/null 2>&1 || exit 0 + +# Extract last assistant message +MESSAGE=$(echo "$INPUT" | jq -r '.last_assistant_message // empty' 2>/dev/null) || exit 0 +[[ -n "$MESSAGE" ]] || exit 0 + +# Check for indicators of incomplete work (case-insensitive) +MESSAGE_LOWER=$(echo "$MESSAGE" | tr '[:upper:]' '[:lower:]') + +INCOMPLETE_PATTERNS=( + "todo" + "not yet implemented" + "will do later" + "placeholder" + "stub" + "not implemented" + "left as an exercise" + "will be implemented" + "needs to be done" + "haven't implemented" + "skip for now" +) + +for pattern in "${INCOMPLETE_PATTERNS[@]}"; do + if echo "$MESSAGE_LOWER" | grep -qF "$pattern" 2>/dev/null; then + harness_log "verify-implementation" "block" "found marker: $pattern" + echo "Implementation appears incomplete. Please finish all TODO items before stopping." >&2 + exit 2 + fi +done + +harness_log "verify-implementation" "pass" +exit 0 diff --git a/plugins/harness/skills/deployment-monitor/SKILL.md b/plugins/harness/skills/deployment-monitor/SKILL.md new file mode 100644 index 0000000..a8e709d --- /dev/null +++ b/plugins/harness/skills/deployment-monitor/SKILL.md @@ -0,0 +1,21 @@ +--- +name: deployment-monitor +description: Monitor a deployment, CI pipeline, or PR status. Sets up recurring + checks and reports only on state changes. +disable-model-invocation: true +--- + +Monitor: $ARGUMENTS + +Set up a recurring check using CronCreate: +1. Determine what to monitor (CI status, PR review, deployment health) +2. Choose an appropriate interval (default: 5 minutes) +3. Create the cron job +4. Report the initial state + +The monitoring will continue until: +- You explicitly cancel it +- The monitored process completes (success or failure) +- The session ends + +Report only STATE CHANGES, not "still running" messages. diff --git a/plugins/harness/skills/init/SKILL.md b/plugins/harness/skills/init/SKILL.md new file mode 100644 index 0000000..6cfdde9 --- /dev/null +++ b/plugins/harness/skills/init/SKILL.md @@ -0,0 +1,38 @@ +--- +name: init +description: Initialize the harness for a project. Detects project type, creates + path-specific rules, and adds harness instructions to CLAUDE.md. Run once per + project to set up enhanced workflows. +disable-model-invocation: true +allowed-tools: Read, Write, Edit, Bash, Glob, Grep +--- + +Initialize the harness for this project: + +1. **Detect project type:** + - Check for package.json, Cargo.toml, go.mod, pyproject.toml, Makefile, etc. + - Identify languages used (by file extensions) + - Identify frameworks (by dependencies) + - Detect monorepo structure (multiple package managers at different depths) + +2. **Create path-specific rules:** + - For each detected language, create `.claude/rules/.md` using the + templates in this skill's directory at `${CLAUDE_SKILL_DIR}/../../rule-templates/` + - Only create rules for languages actually present in the project + +3. **Add harness section to CLAUDE.md** (if CLAUDE.md exists, append; if not, create): + Use the template at `${CLAUDE_SKILL_DIR}/templates/claude-md-section.md` + +4. **Detect and record verification commands:** + - Test command (npm test, cargo test, pytest, go test, make test) + - Lint command (eslint, ruff, clippy, golangci-lint) + - Build command (npm run build, cargo build, go build, make) + - Type check command (tsc --noEmit, mypy, cargo check) + +5. **Set up shared task list** for multi-session continuity: + - Suggest setting CLAUDE_CODE_TASK_LIST_ID in .claude/settings.local.json env + +6. **Report what was set up** concisely to the user. + +Do NOT create any files in a `.harness/` directory. Use native Claude Code +features (task list, auto memory, CLAUDE.md, .claude/rules/) for everything. diff --git a/plugins/harness/skills/init/templates/claude-md-section.md b/plugins/harness/skills/init/templates/claude-md-section.md new file mode 100644 index 0000000..78c4c82 --- /dev/null +++ b/plugins/harness/skills/init/templates/claude-md-section.md @@ -0,0 +1,15 @@ +## Harness + +When compacting, always preserve: current task context, acceptance criteria, +modified file list, and verification commands. + +### Verification +- Test: `{detected_test_command}` +- Lint: `{detected_lint_command}` +- Build: `{detected_build_command}` + +### Workflow +- For complex tasks, explore first (use researcher subagent), then plan, then implement +- Verify work before declaring completion (run tests) +- Use worktree isolation for parallel implementation +- Create tasks via TaskCreate to track multi-step work diff --git a/plugins/harness/skills/logs/SKILL.md b/plugins/harness/skills/logs/SKILL.md new file mode 100644 index 0000000..5a46a86 --- /dev/null +++ b/plugins/harness/skills/logs/SKILL.md @@ -0,0 +1,33 @@ +--- +name: logs +description: Summarize recent harness hook activity from .harness/logs/harness.log. + Shows hook invocations, decisions, durations, and patterns. Use to debug harness + behavior or understand what happened during a session. +disable-model-invocation: true +--- + +Summarize the harness activity log. + +## Recent Log Entries (last 50) +!`tail -50 .harness/logs/harness.log 2>/dev/null || echo "No log file found at .harness/logs/harness.log"` + +## Hook Invocation Counts +!`awk -F'"hook":"' '{split($2,a,"\""); print a[1]}' .harness/logs/harness.log 2>/dev/null | sort | uniq -c | sort -rn || echo "No data"` + +## Decision Summary +!`awk -F'"decision":"' '{split($2,a,"\""); print a[1]}' .harness/logs/harness.log 2>/dev/null | sort | uniq -c | sort -rn || echo "No data"` + +## Slow Hooks (>1000ms) +!`awk -F'"duration_ms":' 'NF>1{split($2,a,"[,}]"); if(a[1]+0 > 1000) print}' .harness/logs/harness.log 2>/dev/null | tail -10 || echo "None"` + +## Blocks (exit 2 decisions) +!`grep '"decision":"block"' .harness/logs/harness.log 2>/dev/null | tail -10 || echo "None"` + +## Your Task + +Analyze the log data above and provide: +1. **Summary**: total hook invocations, time range covered +2. **Hot spots**: which hooks fire most, which are slowest +3. **Blocks**: what was blocked and why +4. **Patterns**: any repeated failures or unusual activity +5. **Recommendations**: anything that could be tuned or investigated diff --git a/plugins/harness/skills/progress-report/SKILL.md b/plugins/harness/skills/progress-report/SKILL.md new file mode 100644 index 0000000..85ddd12 --- /dev/null +++ b/plugins/harness/skills/progress-report/SKILL.md @@ -0,0 +1,17 @@ +--- +name: progress-report +description: Generate a comprehensive progress report showing completed tasks, + pending work, blockers, and overall project health. +disable-model-invocation: true +--- + +Generate a progress report for the current project. + +Include: +1. **Task summary**: completed / in-progress / pending (from task list) +2. **Recent activity**: last 10 git commits with file counts +3. **Test health**: run test suite, report pass/fail +4. **Code quality**: run lint, report issues +5. **Blockers**: any stuck tasks or recurring failures + +Format as a clear, concise summary the user can scan in 30 seconds. diff --git a/plugins/harness/skills/recovery/SKILL.md b/plugins/harness/skills/recovery/SKILL.md new file mode 100644 index 0000000..fc2d948 --- /dev/null +++ b/plugins/harness/skills/recovery/SKILL.md @@ -0,0 +1,27 @@ +--- +name: recovery +description: Recover from failure state by analyzing errors, optionally reverting + changes, and re-planning. Use when implementation is stuck or tests are + persistently failing. +--- + +Recover from the current failure state. + +Process: +1. Analyze recent git history for potentially problematic changes: + !`git log --oneline -10` + !`git diff --stat HEAD~3 2>/dev/null || echo "not enough history"` + +2. Check current test status: + !`npm test 2>&1 | tail -20 || cargo test 2>&1 | tail -20 || pytest 2>&1 | tail -20 || echo "no test command found"` + +3. Based on findings: + - If a specific recent commit broke things: consider `git revert` + - If the approach is fundamentally wrong: discuss with user before reverting + - If it's a fixable error: fix it directly + - If tests are timing out: check for infinite loops or missing mocks + +4. After recovery, run verification to confirm the fix. + +NEVER revert commits without confirming with the user first if the revert +affects more than the current task. diff --git a/plugins/harness/skills/reflect/SKILL.md b/plugins/harness/skills/reflect/SKILL.md new file mode 100644 index 0000000..8d8700d --- /dev/null +++ b/plugins/harness/skills/reflect/SKILL.md @@ -0,0 +1,24 @@ +--- +name: reflect +description: After completing complex work, reflect on what could improve the + project's configuration. May suggest updates to CLAUDE.md, rules, or skills. + Use after major milestone completions. +user-invocable: false +context: fork +agent: architect +--- + +Review the work just completed and identify improvements: + +1. Were there patterns Claude kept getting wrong that a CLAUDE.md rule could prevent? +2. Were there verification steps that should be automated via rules? +3. Were there conventions that should be documented? +4. Were there subagent configurations that could be improved? + +Rules for suggestions: +- Only suggest changes that would prevent REAL problems observed during this work +- Do NOT add obvious or generic rules +- Do NOT add rules Claude already follows without being told +- Keep it concise -- every line in CLAUDE.md costs context + +If you identify genuinely useful improvements, suggest them. Otherwise, do nothing. diff --git a/plugins/harness/skills/requirements-interview/SKILL.md b/plugins/harness/skills/requirements-interview/SKILL.md new file mode 100644 index 0000000..ddf925c --- /dev/null +++ b/plugins/harness/skills/requirements-interview/SKILL.md @@ -0,0 +1,22 @@ +--- +name: requirements-interview +description: Interview the user to gather detailed requirements for underspecified + tasks. Uses AskUserQuestion for structured gathering. Use when a task is too vague + for its complexity level. +user-invocable: false +--- + +The user has requested: $ARGUMENTS + +This request needs clarification before implementation. Use the AskUserQuestion +tool to interview the user about: + +1. Technical constraints (language, framework, existing patterns to follow) +2. Scope boundaries (what's in/out of scope) +3. Edge cases they've considered +4. Acceptance criteria (how will we know it's done?) +5. Priority (which parts are most important?) + +Ask ONE question at a time. Keep questions concrete and offer choices where +possible. Stop interviewing when you have enough detail to create a clear +task decomposition. diff --git a/plugins/harness/skills/session-bridge/SKILL.md b/plugins/harness/skills/session-bridge/SKILL.md new file mode 100644 index 0000000..152d883 --- /dev/null +++ b/plugins/harness/skills/session-bridge/SKILL.md @@ -0,0 +1,20 @@ +--- +name: session-bridge +description: Orient the current session with live project state. Injects git history, + test status, and task list to quickly understand where work left off. Use at session + start or when resuming long-running work. +user-invocable: false +--- + +## Current Project State + +- Git branch: !`git branch --show-current 2>/dev/null || echo "not a git repo"` +- Recent commits: !`git log --oneline -10 2>/dev/null || echo "no git history"` +- Uncommitted changes: !`git diff --stat 2>/dev/null || echo "none"` +- Untracked files: !`git ls-files --others --exclude-standard 2>/dev/null | head -10` + +## Your Task + +Review the project state above. If there are recent commits suggesting +in-progress work, identify the next logical step. Check the task list +(Ctrl+T) for pending items. Resume the highest-priority incomplete work. diff --git a/plugins/harness/skills/spec-ingest/SKILL.md b/plugins/harness/skills/spec-ingest/SKILL.md new file mode 100644 index 0000000..8138c14 --- /dev/null +++ b/plugins/harness/skills/spec-ingest/SKILL.md @@ -0,0 +1,33 @@ +--- +name: spec-ingest +description: Ingest a specification document (PRD, design doc, requirements) and + decompose it into granular feature units with verification criteria. Produces + 50-200+ features for comprehensive coverage. +disable-model-invocation: true +context: fork +agent: architect +--- + +Ingest this specification and decompose it: $ARGUMENTS + +ultrathink about the feature decomposition. + +Process: +1. Read the entire specification document +2. Extract EVERY discrete feature, requirement, and acceptance criterion +3. Be extremely granular -- Anthropic research shows 200+ features prevents + premature completion declarations +4. For each feature, define: + - Category (functional, UI, API, data, integration, non-functional) + - Description (specific, testable) + - Verification steps (how to confirm it works) + - Priority (1 = must-have, 2 = important, 3 = nice-to-have) + - Dependencies (which features must come first) +5. Group features into modules +6. Create tasks via TaskCreate for each feature +7. Present a summary: total features, module breakdown, suggested implementation order + +CRITICAL: Features MUST be in a format that can be verified. Each feature +should describe observable behavior, not internal implementation details. + +For the feature schema, see [schema](templates/feature-schema.json). diff --git a/plugins/harness/skills/spec-ingest/templates/feature-schema.json b/plugins/harness/skills/spec-ingest/templates/feature-schema.json new file mode 100644 index 0000000..3a21e94 --- /dev/null +++ b/plugins/harness/skills/spec-ingest/templates/feature-schema.json @@ -0,0 +1,34 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "category": { + "type": "string", + "enum": ["functional", "ui", "api", "data", "integration", "non-functional"] + }, + "description": { + "type": "string", + "description": "Specific, testable description of the feature" + }, + "verification_steps": { + "type": "array", + "items": { "type": "string" }, + "description": "Steps to verify the feature works" + }, + "priority": { + "type": "integer", + "enum": [1, 2, 3], + "description": "1=must-have, 2=important, 3=nice-to-have" + }, + "depends_on": { + "type": "array", + "items": { "type": "string" }, + "description": "IDs of features that must be completed first" + }, + "module": { + "type": "string", + "description": "Logical module this feature belongs to" + } + }, + "required": ["category", "description", "verification_steps", "priority"] +} diff --git a/plugins/harness/skills/task-analyze/SKILL.md b/plugins/harness/skills/task-analyze/SKILL.md new file mode 100644 index 0000000..b3fc564 --- /dev/null +++ b/plugins/harness/skills/task-analyze/SKILL.md @@ -0,0 +1,26 @@ +--- +name: task-analyze +description: Analyze a task's complexity and recommend the execution strategy. Maps + to simple (direct), medium (subagent-assisted), complex (parallel subagents), or + massive (agent team/batch). Use automatically when evaluating new work. +user-invocable: false +--- + +Analyze this task for complexity: $ARGUMENTS + +Classification criteria: +| Level | Files | Signal | Strategy | +|---------|--------|-----------------------------------------------|---------------------| +| Simple | 1-3 | Bug fix, typo, config, single-file change | Direct execution | +| Medium | 3-10 | Feature, refactor, test addition | Researcher + plan | +| Complex | 10-30 | Cross-cutting change, multi-module feature | Parallel subagents | +| Massive | 30+ | Full spec, migration, new project from scratch| Agent team or /batch| + +Also assess: +- Does this need an interview? (vague request + medium+ complexity) +- Does this need a plan? (medium+ complexity) +- What effort level? (low/medium/high/max mapping to complexity) +- Are there cross-cutting concerns? (API + DB + UI + tests = complex) + +Report your classification naturally as part of your response. Do NOT use +harness terminology -- just act on the classification. diff --git a/plugins/harness/skills/task-decompose/SKILL.md b/plugins/harness/skills/task-decompose/SKILL.md new file mode 100644 index 0000000..a213471 --- /dev/null +++ b/plugins/harness/skills/task-decompose/SKILL.md @@ -0,0 +1,32 @@ +--- +name: task-decompose +description: Decompose a complex task into independent, parallelizable work units. + Each unit has clear acceptance criteria, file boundaries, and verification steps. + Use when work is classified as complex or massive. +context: fork +agent: architect +--- + +Decompose this task into independent work units: $ARGUMENTS + +ultrathink about the decomposition strategy. + +Rules: +1. Each unit MUST be independently verifiable +2. Units MUST have minimal file overlap (avoid merge conflicts in parallel work) +3. Each unit MUST have explicit acceptance criteria +4. Order units by dependency (note which are blocked by others) +5. Target 5-6 units per agent (from Anthropic agent teams best practices) +6. Each unit should be completable in a single focused session + +For each unit, specify: +- Description (one sentence) +- Files likely affected +- Acceptance criteria (testable, specific) +- Dependencies (which other units must complete first) +- Verification command + +After decomposition, create tasks via TaskCreate for each unit. + +For additional decomposition patterns, see [templates](templates/decomposition-template.md). +For examples of good decompositions, see [examples](examples/good-decompositions.md). diff --git a/plugins/harness/skills/task-decompose/examples/good-decompositions.md b/plugins/harness/skills/task-decompose/examples/good-decompositions.md new file mode 100644 index 0000000..d87fb3d --- /dev/null +++ b/plugins/harness/skills/task-decompose/examples/good-decompositions.md @@ -0,0 +1,31 @@ +# Example: Adding OAuth2 to an Express App + +## Unit 1: OAuth Configuration +- Files: `src/config/oauth.ts`, `.env.example` +- Criteria: OAuth config loads from env vars, validates required fields +- Depends on: none +- Verify: `npm test -- --grep 'oauth config'` + +## Unit 2: OAuth Routes +- Files: `src/routes/auth/oauth.ts`, `src/routes/auth/callback.ts` +- Criteria: GET /auth/google redirects to Google, GET /auth/callback handles response +- Depends on: Unit 1 +- Verify: `npm test -- --grep 'oauth routes'` + +## Unit 3: Session Integration +- Files: `src/middleware/session.ts`, `src/models/user.ts` +- Criteria: OAuth user is created/found, session is established +- Depends on: Unit 2 +- Verify: `npm test -- --grep 'oauth session'` + +## Unit 4: Frontend Integration +- Files: `src/components/LoginButton.tsx`, `src/hooks/useAuth.ts` +- Criteria: Google login button appears, redirects correctly, shows user after login +- Depends on: Unit 3 +- Verify: `npm test -- --grep 'login button'` + Chrome visual check + +## Unit 5: Integration Tests +- Files: `tests/integration/oauth.test.ts` +- Criteria: Full OAuth flow works end-to-end +- Depends on: Units 1-4 +- Verify: `npm test -- --grep 'oauth integration'` diff --git a/plugins/harness/skills/task-decompose/templates/decomposition-template.md b/plugins/harness/skills/task-decompose/templates/decomposition-template.md new file mode 100644 index 0000000..5530d5d --- /dev/null +++ b/plugins/harness/skills/task-decompose/templates/decomposition-template.md @@ -0,0 +1,40 @@ +# Decomposition Template + +## Unit Template +```json +{ + "id": "unit-NNN", + "description": "One-sentence description of the work", + "files": ["src/path/file.ts"], + "acceptance_criteria": [ + "Specific testable criterion 1", + "Specific testable criterion 2" + ], + "depends_on": [], + "verification": "npm test -- --grep 'pattern'" +} +``` + +## Common Decomposition Patterns + +### Feature Implementation +1. Data model / types / interfaces +2. Business logic / service layer +3. API endpoint / controller +4. UI component +5. Tests for each layer +6. Integration test + +### Refactoring +1. Create new abstraction / interface +2. Implement new version +3. Migrate consumers one at a time +4. Remove old implementation +5. Update tests + +### Migration +1. Create migration script +2. Update data access layer +3. Update dependent services +4. Run migration +5. Verify data integrity diff --git a/plugins/harness/skills/verify-work/SKILL.md b/plugins/harness/skills/verify-work/SKILL.md new file mode 100644 index 0000000..7b7fbec --- /dev/null +++ b/plugins/harness/skills/verify-work/SKILL.md @@ -0,0 +1,25 @@ +--- +name: verify-work +description: Run comprehensive verification of all completed work. Checks tests, + lint, build, type errors, and acceptance criteria. Use before declaring any + significant work complete. +context: fork +agent: tester +--- + +Run comprehensive verification of the current project state. + +Checklist: +1. Run the test suite. Report pass/fail count. +2. Run the linter. Report error count. +3. Run the build (if applicable). Report success/failure. +4. Run type checking (if applicable). Report error count. +5. Check git status -- are there uncommitted changes that should be committed? +6. Review the task list -- are there tasks marked complete that have failing checks? + +For each failing check: +- Identify the specific failure +- Suggest the fix +- Estimate effort to fix + +Report results as a structured summary.