jlevy · jlevy · Feb 28, 2026 · Feb 27, 2026 · Feb 27, 2026 · Feb 27, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -56,5 +56,8 @@ jobs:
       - name: Run unit tests
         run: uv run pytest
 
-      - name: Run integration tests
-        run: ./tests/run.sh
+      - name: Check golden coverage
+        run: bash scripts/check-golden-coverage.sh
+
+      - name: Run golden tests
+        run: npx tryscript@latest run tests/tryscript/*.tryscript.md
diff --git a/.tbd/.gitignore b/.tbd/.gitignore
@@ -0,0 +1,21 @@
+# Installed documentation (regenerated on setup)
+docs/
+
+# Hidden worktree for tbd-sync branch
+data-sync-worktree/
+
+# Data sync directory (only exists in worktree)
+data-sync/
+
+# Local state
+state.yml
+
+# Migration backups (local only, not synced)
+backups/
+
+# Temporary files
+*.tmp
+*.temp
+
+# workspaces/ stores state (including outbox) committed to the working branch
+!workspaces/
diff --git a/.tbd/config.yml b/.tbd/config.yml
@@ -0,0 +1,93 @@
+tbd_format: f03
+tbd_version: 0.1.22
+display:
+  id_prefix: rpy
+sync:
+  branch: tbd-sync
+  remote: origin
+settings:
+  auto_sync: false
+  doc_auto_sync_hours: 24
+  use_gh_cli: true
+# Documentation cache configuration.
+# files: Maps destination paths (relative to .tbd/docs/) to source locations.
+#   Sources can be:
+#   - internal: prefix for bundled docs (e.g., "internal:shortcuts/standard/code-review-and-commit.md")
+#   - Full URL for external docs (e.g., "https://raw.githubusercontent.com/org/repo/main/file.md")
+# lookup_path: Search paths for doc lookup (like shell $PATH). Earlier paths take precedence.
+#
+# To sync docs: tbd sync --docs
+# To check status: tbd sync --status
+#
+# Auto-sync: Docs are automatically synced when stale (default: every 24 hours).
+# Configure with settings.doc_auto_sync_hours (0 = disabled).
+docs_cache:
+  lookup_path:
+    - .tbd/docs/shortcuts/system
+    - .tbd/docs/shortcuts/standard
+  files:
+    shortcuts/system/shortcut-explanation.md: internal:shortcuts/system/shortcut-explanation.md
+    shortcuts/system/skill-baseline.md: internal:shortcuts/system/skill-baseline.md
+    shortcuts/system/skill-brief.md: internal:shortcuts/system/skill-brief.md
+    shortcuts/system/skill-minimal.md: internal:shortcuts/system/skill-minimal.md
+    shortcuts/standard/agent-handoff.md: internal:shortcuts/standard/agent-handoff.md
+    shortcuts/standard/checkout-third-party-repo.md: internal:shortcuts/standard/checkout-third-party-repo.md
+    shortcuts/standard/code-cleanup-all.md: internal:shortcuts/standard/code-cleanup-all.md
+    shortcuts/standard/code-cleanup-docstrings.md: internal:shortcuts/standard/code-cleanup-docstrings.md
+    shortcuts/standard/code-cleanup-tests.md: internal:shortcuts/standard/code-cleanup-tests.md
+    shortcuts/standard/code-review-and-commit.md: internal:shortcuts/standard/code-review-and-commit.md
+    shortcuts/standard/coding-spike.md: internal:shortcuts/standard/coding-spike.md
+    shortcuts/standard/create-or-update-pr-simple.md: internal:shortcuts/standard/create-or-update-pr-simple.md
+    shortcuts/standard/create-or-update-pr-with-validation-plan.md: internal:shortcuts/standard/create-or-update-pr-with-validation-plan.md
+    shortcuts/standard/implement-beads.md: internal:shortcuts/standard/implement-beads.md
+    shortcuts/standard/merge-upstream.md: internal:shortcuts/standard/merge-upstream.md
+    shortcuts/standard/new-architecture-doc.md: internal:shortcuts/standard/new-architecture-doc.md
+    shortcuts/standard/new-guideline.md: internal:shortcuts/standard/new-guideline.md
+    shortcuts/standard/new-plan-spec.md: internal:shortcuts/standard/new-plan-spec.md
+    shortcuts/standard/new-qa-playbook.md: internal:shortcuts/standard/new-qa-playbook.md
+    shortcuts/standard/new-research-brief.md: internal:shortcuts/standard/new-research-brief.md
+    shortcuts/standard/new-shortcut.md: internal:shortcuts/standard/new-shortcut.md
+    shortcuts/standard/new-validation-plan.md: internal:shortcuts/standard/new-validation-plan.md
+    shortcuts/standard/plan-implementation-with-beads.md: internal:shortcuts/standard/plan-implementation-with-beads.md
+    shortcuts/standard/precommit-process.md: internal:shortcuts/standard/precommit-process.md
+    shortcuts/standard/review-code-python.md: internal:shortcuts/standard/review-code-python.md
+    shortcuts/standard/review-code-typescript.md: internal:shortcuts/standard/review-code-typescript.md
+    shortcuts/standard/review-code.md: internal:shortcuts/standard/review-code.md
+    shortcuts/standard/review-github-pr.md: internal:shortcuts/standard/review-github-pr.md
+    shortcuts/standard/revise-all-architecture-docs.md: internal:shortcuts/standard/revise-all-architecture-docs.md
+    shortcuts/standard/revise-architecture-doc.md: internal:shortcuts/standard/revise-architecture-doc.md
+    shortcuts/standard/setup-github-cli.md: internal:shortcuts/standard/setup-github-cli.md
+    shortcuts/standard/sync-failure-recovery.md: internal:shortcuts/standard/sync-failure-recovery.md
+    shortcuts/standard/update-specs-status.md: internal:shortcuts/standard/update-specs-status.md
+    shortcuts/standard/welcome-user.md: internal:shortcuts/standard/welcome-user.md
+    guidelines/backward-compatibility-rules.md: internal:guidelines/backward-compatibility-rules.md
+    guidelines/bun-monorepo-patterns.md: internal:guidelines/bun-monorepo-patterns.md
+    guidelines/cli-agent-skill-patterns.md: internal:guidelines/cli-agent-skill-patterns.md
+    guidelines/commit-conventions.md: internal:guidelines/commit-conventions.md
+    guidelines/convex-limits-best-practices.md: internal:guidelines/convex-limits-best-practices.md
+    guidelines/convex-rules.md: internal:guidelines/convex-rules.md
+    guidelines/electron-app-development-patterns.md: internal:guidelines/electron-app-development-patterns.md
+    guidelines/error-handling-rules.md: internal:guidelines/error-handling-rules.md
+    guidelines/general-coding-rules.md: internal:guidelines/general-coding-rules.md
+    guidelines/general-comment-rules.md: internal:guidelines/general-comment-rules.md
+    guidelines/general-eng-assistant-rules.md: internal:guidelines/general-eng-assistant-rules.md
+    guidelines/general-style-rules.md: internal:guidelines/general-style-rules.md
+    guidelines/general-tdd-guidelines.md: internal:guidelines/general-tdd-guidelines.md
+    guidelines/general-testing-rules.md: internal:guidelines/general-testing-rules.md
+    guidelines/golden-testing-guidelines.md: internal:guidelines/golden-testing-guidelines.md
+    guidelines/pnpm-monorepo-patterns.md: internal:guidelines/pnpm-monorepo-patterns.md
+    guidelines/python-cli-patterns.md: internal:guidelines/python-cli-patterns.md
+    guidelines/python-modern-guidelines.md: internal:guidelines/python-modern-guidelines.md
+    guidelines/python-rules.md: internal:guidelines/python-rules.md
+    guidelines/release-notes-guidelines.md: internal:guidelines/release-notes-guidelines.md
+    guidelines/tbd-sync-troubleshooting.md: internal:guidelines/tbd-sync-troubleshooting.md
+    guidelines/typescript-cli-tool-rules.md: internal:guidelines/typescript-cli-tool-rules.md
+    guidelines/typescript-code-coverage.md: internal:guidelines/typescript-code-coverage.md
+    guidelines/typescript-rules.md: internal:guidelines/typescript-rules.md
+    guidelines/typescript-sorting-patterns.md: internal:guidelines/typescript-sorting-patterns.md
+    guidelines/typescript-yaml-handling-rules.md: internal:guidelines/typescript-yaml-handling-rules.md
+    guidelines/writing-style-guidelines.md: internal:guidelines/writing-style-guidelines.md
+    templates/architecture-doc.md: internal:templates/architecture-doc.md
+    templates/plan-spec.md: internal:templates/plan-spec.md
+    templates/qa-playbook.md: internal:templates/qa-playbook.md
+    templates/research-brief.md: internal:templates/research-brief.md
diff --git a/Makefile b/Makefile
@@ -3,7 +3,7 @@
 
 .DEFAULT_GOAL := default
 
-.PHONY: default install lint format gendocs test update-golden upgrade build clean
+.PHONY: default install lint format gendocs test test-golden test-golden-coverage update-golden upgrade build clean
 
 default: install lint gendocs test
 
@@ -21,10 +21,17 @@ gendocs:
 
 test:
 	uv run pytest
-	./tests/run.sh
+	$(MAKE) test-golden-coverage
+	$(MAKE) test-golden
+
+test-golden:
+	npx tryscript@latest run tests/tryscript/*.tryscript.md
+
+test-golden-coverage:
+	bash scripts/check-golden-coverage.sh
 
 update-golden:
-	./tests/run.sh || cp tests/golden-tests-actual.log tests/golden-tests-expected.log
+	npx tryscript@latest run --update tests/tryscript/*.tryscript.md
 
 upgrade:
 	uv sync --upgrade --all-extras --dev

diff --git a/docs/development.md b/docs/development.md
@@ -46,8 +46,11 @@ make upgrade
 uv run pytest   # all tests
 uv run pytest -s tests/pytests.py  # one test, showing outputs
 
-# Run integration tests:
-./tests/run.sh
+# Run golden tests:
+npx tryscript@latest run tests/tryscript/*.tryscript.md
+
+# Check golden coverage gates:
+bash scripts/check-golden-coverage.sh
 
 # Update golden test baseline (when expected test output changes intentionally):
 make update-golden
@@ -88,37 +91,38 @@ and backup management.
 uv run pytest tests/pytests.py
 ```
 
-### Golden Tests (`tests/golden-tests.sh`)
+### Golden Tests (`tests/tryscript/*.tryscript.md`)
 
-Shell-based integration tests that exercise the full CLI. These tests capture CLI output
-and compare it against a committed baseline (`tests/golden-tests-expected.log`).
+Tryscript-based integration tests exercise the full CLI using fixture-first session
+files grouped by behavior (help/errors, replacements, renames/full mode, backup
+lifecycle, JSON, filters, and regex/case flows).
 
 **Running golden tests:**
 ```shell
-./tests/run.sh    # Runs tests and compares output to expected baseline
+npx tryscript@latest run tests/tryscript/*.tryscript.md
 ```
 
 **Updating the baseline when output changes intentionally:**
 ```shell
 make update-golden
 ```
 
+**Running golden quality gates:**
+```shell
+bash scripts/check-golden-coverage.sh
+```
+
 This is useful when:
 - Adding new CLI features that produce different output
-- Fixing bugs that change output format
-- Adding new test cases to `golden-tests.sh`
-
-The `run.sh` script:
-1. Copies `tests/work-dir` to `tests/tmp-dir` for isolation
-2. Runs `golden-tests.sh` in the temp directory
-3. Normalizes output (removes timestamps, line numbers, etc.)
-4. Compares against `golden-tests-expected.log`
+- Fixing bugs that change output format or lifecycle behavior
+- Adding new scenario modules or fixture flows
 
 **Adding new golden tests:**
-1. Edit `tests/golden-tests.sh` to add new test commands
-2. Run `make update-golden` to capture the new expected output
-3. Review the diff in `tests/golden-tests-expected.log`
-4. Commit both the script changes and the updated expected log
+1. Add or extend files under `tests/tryscript/` and `tests/tryscript/fixtures/`
+2. Run `make update-golden` to capture updated expected output
+3. Run `bash scripts/check-golden-coverage.sh` to enforce coverage/anti-pattern gates
+4. Review the markdown diff in the changed `.tryscript.md` files
+5. Commit scenario/fixture updates together
 
 ## IDE setup
 

diff --git a/docs/project/research/current/research-2026-02-27-golden-harness-strategy.md b/docs/project/research/current/research-2026-02-27-golden-harness-strategy.md
@@ -0,0 +1,66 @@
+# Research: Golden Harness Strategy for Rust Port Prep (2026-02-27)
+
+> Superseded (2026-02-27): This document captured a temporary keep-shell decision.
+> The repository has since migrated to tryscript as the authoritative golden harness.
+> See:
+> `docs/project/specs/active/plan-2026-02-27-repren-python-tryscript-full-migration.md`.
+
+## Purpose
+
+Decide whether to keep the existing shell golden harness (`tests/golden-tests.sh` +
+`tests/run.sh`) as the current parity baseline, or migrate immediately to a different
+harness format before Rust parity work accelerates.
+
+## Decision
+
+Keep the shell golden harness as the authoritative baseline for now.
+
+## Why keep it now
+
+1. It already covers broad end-to-end CLI behavior:
+- replacements, renames, full mode, include/exclude, `--walk-only`
+- backup lifecycle (`--undo`, `--clean-backups`, custom backup suffix)
+- JSON output mode and error paths
+- skill install and collision handling scenarios
+
+2. It is already normalized and deterministic in practice:
+- `tests/run.sh` strips volatile paths/line numbers/timestamps
+- baseline diffing is stable across reruns in this environment
+
+3. Rust porting work benefits from stable fixtures immediately:
+- changing the harness now would add migration noise unrelated to behavior parity
+- current harness output can be consumed directly to build Rust parity checks
+
+## Known limitations
+
+1. Shell scripts are harder to refactor and parameterize than structured test fixtures.
+2. Assertions are baseline-diff oriented, not strongly typed.
+3. Running in parallel with other harness runs can create temp-dir collisions.
+
+## Operational guidance for current harness
+
+1. Run `./tests/run.sh` serially (do not run in parallel with pytest jobs that invoke it).
+2. Treat `tests/golden-tests-expected.log` as a versioned contract artifact.
+3. Update baseline only when behavior change is intentional and documented.
+
+## Migration trigger conditions
+
+Revisit migration only when one or more are true:
+
+1. We need per-scenario selective execution to speed parity triage.
+2. Golden diffs become too noisy for maintainable review.
+3. Rust cross-language parity harness needs tighter, fixture-level assertions.
+
+## Deferred migration plan sketch
+
+If migration is needed later:
+
+1. Keep shell harness as source of truth during transition.
+2. Port one scenario group at a time into a structured harness.
+3. Compare new harness output against existing baseline for each migrated group.
+4. Only retire shell sections once parity and determinism are proven.
+
+## Related documents
+
+- `docs/project/specs/active/plan-2026-02-27-rust-port-prep-and-test-hardening.md`
+- `../repren-rs/docs/project/specs/active/plan-2026-02-27-repren-port-master-plan.md`