diff --git a/skills/integration/survey_queries_test.go b/skills/integration/survey_queries_test.go index 04f0580c..aa6a7c39 100644 --- a/skills/integration/survey_queries_test.go +++ b/skills/integration/survey_queries_test.go @@ -121,6 +121,22 @@ func runQuery(t *testing.T, db, query string) []string { return rows } +// execSQLite runs a non-query SQL statement (an UPDATE) against the fixture DB. The +// non-vacuousness sub-tests mutate a fresh fixture copy and re-run a query to prove an +// expected value FLIPS under the mutation — so the query is load-bearing, not a constant. +func execSQLite(t *testing.T, db, stmt string) { + t.Helper() + sqlite3, err := exec.LookPath("sqlite3") + if err != nil { + t.Skip("sqlite3 not on PATH") + } + cmd := exec.Command(sqlite3, db) + cmd.Stdin = strings.NewReader(stmt + "\n") + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("exec mutation against fixture: %v\n%s", err, out) + } +} + // TestSurveyQuerySmoke is the AC-2 query-smoke. It runs each labeled query from // skills/survey/references/queries.sql against a committed production-shaped fixture DB // and asserts the CORRECTED shape. Expected values come from the FIXTURE rows — an @@ -132,18 +148,21 @@ func TestSurveyQuerySmoke(t *testing.T) { db := buildFixtureDB(t) queries := loadLabeledQueries(t) - for _, name := range []string{"scoping", "codex-presence", "scaffold-usage", "work-by-area", "decision-open"} { + for _, name := range []string{ + "scoping", "codex-presence", "codex-scoped", "codex-workstreams", "codex-activity", + "scaffold-usage", "work-by-area", "decision-open", "mode-classification", + } { if _, ok := queries[name]; !ok { t.Fatalf("recommended-SQL reference is missing the %q query (have: %v)", name, sortedQueryNames(queries)) } } // scoping (#318): under the corrected git-root-basename model every in-repo checkout - // shares ONE `project` key, so COUNT(DISTINCT project) is structurally always 1 and - // `folded_keys` is gone. The cwd-prefix-union still does the load-bearing work — it - // counts the cwd-AT-root + subdir + worktree sessions (3) and EXCLUDES the same-basename - // sibling, the blank-cwd session, the out-of-repo session, and the codex rows. The row - // is the corrected 3-field shape: sessions|blank_cwd|span. + // shares ONE `project` key; the cwd-prefix-union does the load-bearing work — it counts + // the in-repo Claude sessions (cwd AT root, subdir, worktree, plus the F/G worktree-shape + // + the mode-classification track sessions, all under the prefix) and EXCLUDES the + // blank-cwd session, the out-of-repo session, and ALL the codex rows. The fixture has 9 + // in-repo Claude sessions: A,B,C + WT + issue-feed×2 + landing-copy×2 + mixed-bag. t.Run("scoping", func(t *testing.T) { rows := runQuery(t, db, queries["scoping"]) if len(rows) != 1 { @@ -153,8 +172,8 @@ func TestSurveyQuerySmoke(t *testing.T) { if len(fields) != 3 { t.Fatalf("scoping row should have 3 fields (sessions|blank_cwd|span) — folded_keys is dropped, got: %q", rows[0]) } - if fields[0] != "3" { - t.Errorf("the cwd-prefix should count 3 in-repo Claude sessions, got sessions=%q", fields[0]) + if fields[0] != "9" { + t.Errorf("the cwd-prefix should count 9 in-repo Claude sessions, got sessions=%q", fields[0]) } if fields[1] != "0" { t.Errorf("the blank-cwd Claude session is outside the prefix and must not count, got blank_cwd=%q", fields[1]) @@ -164,8 +183,8 @@ func TestSurveyQuerySmoke(t *testing.T) { // codex-presence (#69): Codex sessions land cwd='' (agentsview does not persist Codex // cwd), so the cwd-prefix scope misses them. This separate flagged count matches by // `project = :repo_project` ALONE — which means it also catches a same-basename SIBLING - // repo's Codex sessions (the documented collision). The fixture has two such rows (one - // in-repo, one same-basename sibling shape), both blank-cwd, so the count is 2 and + // repo's Codex sessions (the documented collision). The fixture has five such rows (four + // in-repo F* + one same-basename sibling G), all blank-cwd, so the count is 5 and // blank_cwd > 0. This is a presence flag, NOT a union — the scoping count below is // asserted UNCHANGED by these rows. t.Run("codex-presence", func(t *testing.T) { @@ -177,25 +196,118 @@ func TestSurveyQuerySmoke(t *testing.T) { if len(fields) != 2 { t.Fatalf("codex-presence row should have 2 fields (codex_sessions|blank_cwd), got: %q", rows[0]) } - if fields[0] != "2" { - t.Errorf("codex-presence should count 2 Codex sessions matching the repo project name (in-repo + same-basename sibling), got %q", fields[0]) + if fields[0] != "5" { + t.Errorf("codex-presence should count 5 Codex sessions matching the repo project name (4 in-repo F* + same-basename sibling G), got %q", fields[0]) } if fields[1] == "0" { t.Errorf("Codex cwd is unrecorded so blank_cwd must be > 0, got blank_cwd=%q", fields[1]) } }) + // codex-scoped (#321, AC-1): attributes Codex to THIS repo by exec_command.$.workdir + // prefix — DISTINCT from codex-presence's name-only match. The four F* sessions have an + // exec_command whose $.workdir is under /repo/proj (one is a worktree path), so they are + // IN scope; the sibling G's workdir is under /sibling/proj, so it is EXCLUDED. The count + // is 4 (the four F*), strictly fewer than codex-presence's 5 — proving the two signals + // MEASURE DIFFERENT THINGS (scoped ⊂ presence, sibling-free). (AC-1 illustrates the + // mechanism at "1 vs 2"; the clustering AC needs 4 attributed sessions, so the fixture + // scales to 4 vs 5 — the binding asserts, sibling-exclusion + the prefix-load-bearing + // flip, hold identically.) Non-vacuous: re-pointing G's workdir under /repo/proj flips + // the count 4→5, proving the prefix is load-bearing, not a constant. + t.Run("codex-scoped", func(t *testing.T) { + rows := runQuery(t, db, queries["codex-scoped"]) + if len(rows) != 1 { + t.Fatalf("codex-scoped should return one count row, got %d: %v", len(rows), rows) + } + if rows[0] != "4" { + t.Errorf("codex-scoped should count 4 workdir-attributed Codex sessions (F* in-repo, sibling G excluded), got %q", rows[0]) + } + // distinct from codex-presence (5) — the two signals differ on the same fixture. + pres := runQuery(t, db, queries["codex-presence"]) + if presCount := strings.Split(pres[0], "|")[0]; presCount == rows[0] { + t.Errorf("codex-scoped (%q) must differ from codex-presence (%q) — scoped is the sibling-free subset", rows[0], presCount) + } + // non-vacuous: re-point sibling G's exec_command workdir UNDER the repo prefix → 4 becomes 5. + db2 := buildFixtureDB(t) + execSQLite(t, db2, `UPDATE tool_calls SET input_json='{"command":"go build","workdir":"/repo/proj"}' WHERE id=46;`) + flipped := runQuery(t, db2, queries["codex-scoped"]) + if flipped[0] != "5" { + t.Errorf("re-pointing the sibling's workdir under the repo prefix must flip codex-scoped 4→5 (prefix is load-bearing), got %q", flipped[0]) + } + }) + + // codex-workstreams (#322, AC-3): clusters the codex-scoped sessions by the 3-case rule — + // dispatch-pattern → {TASK} (stage stripped), task/entity backtick → {TASK}, else + // (unlabeled). The expected labels are SUBSTRINGS of the fixture first_messages (an + // independent source — never written in SKILL.md), so a broken extractor reds. Non-vacuous: + // the stage suffix must be STRIPPED (journey-cost-ledger, NOT journey-cost-ledger-implementation), + // the two distinct dispatch tasks must NOT merge (codex-live-ci separate), the backtick + // task name must anchor past the leading reviewer-label backtick (orient-workflow-discovery, + // not 142-validation/Ensign), and (unlabeled) must sort LAST. + t.Run("codex-workstreams", func(t *testing.T) { + rows := runQuery(t, db, queries["codex-workstreams"]) + got := map[string]string{} + for _, r := range rows { + f := strings.Split(r, "|") + if len(f) != 2 { + t.Fatalf("codex-workstreams row should be workstream|sessions, got: %q", r) + } + got[f[0]] = f[1] + } + for _, want := range []string{"journey-cost-ledger", "orient-workflow-discovery", "codex-live-ci", "(unlabeled)"} { + if got[want] != "1" { + t.Errorf("workstream %q should cluster 1 session, got %q in %v", want, got[want], got) + } + } + if _, leaked := got["journey-cost-ledger-implementation"]; leaked { + t.Errorf("the dispatch stage suffix must be stripped — saw an un-stripped label in %v", got) + } + if _, leaked := got["142-validation/Ensign"]; leaked { + t.Errorf("the task/entity label must anchor past the leading reviewer-label backtick, got %v", got) + } + if len(got) != 4 { + t.Errorf("expected exactly 4 workstream buckets (3 named + unlabeled), got %v", got) + } + // (unlabeled) sorts last so the named tracks lead the rendered list. + if last := strings.Split(rows[len(rows)-1], "|")[0]; last != "(unlabeled)" { + t.Errorf("(unlabeled) must sort last, got trailing row %q", rows[len(rows)-1]) + } + }) + + // codex-activity (#323): per-tool tally over the codex-scoped set — exec_command (4, one + // per F* session), update_plan (1), spawn_agent (1). The sibling G's exec_command must NOT + // count (it is outside the workdir prefix), proving the activity tally honors the same scope. + t.Run("codex-activity", func(t *testing.T) { + rows := runQuery(t, db, queries["codex-activity"]) + got := map[string]string{} + for _, r := range rows { + f := strings.Split(r, "|") + if len(f) != 2 { + t.Fatalf("codex-activity row should be tool|calls, got: %q", r) + } + got[f[0]] = f[1] + } + if got["exec_command"] != "4" { + t.Errorf("exec_command should tally 4 over the codex-scoped set (sibling G excluded), got %q in %v", got["exec_command"], got) + } + if got["update_plan"] != "1" { + t.Errorf("update_plan should tally 1, got %q in %v", got["update_plan"], got) + } + if got["spawn_agent"] != "1" { + t.Errorf("spawn_agent should tally 1, got %q in %v", got["spawn_agent"], got) + } + }) + // no-union (AC-2c): the added Codex rows must NOT inflate the Claude scope. The scoping - // query is asserted to 3 above (the same value the pre-Codex fixture yielded), proving - // Codex stays out of the Claude `sessions` count — a flagged presence, never a silent - // project union. + // query is asserted to 9 above (the Claude-only in-repo count), proving Codex stays out + // of the Claude `sessions` count — a flagged presence, never a silent project union. t.Run("codex-not-folded-into-scope", func(t *testing.T) { rows := runQuery(t, db, queries["scoping"]) if len(rows) != 1 { t.Fatalf("scoping should return one summary row, got %d: %v", len(rows), rows) } - if sessions := strings.Split(rows[0], "|")[0]; sessions != "3" { - t.Errorf("the Codex rows must not be folded into the Claude scope; scoping.sessions should stay 3, got %q", sessions) + if sessions := strings.Split(rows[0], "|")[0]; sessions != "9" { + t.Errorf("the Codex rows must not be folded into the Claude scope; scoping.sessions should stay 9, got %q", sessions) } }) @@ -224,27 +336,113 @@ func TestSurveyQuerySmoke(t *testing.T) { } }) - // work-by-area (#317.2): Edit/Write file_paths bucket by first package segment under - // the repo root; a path OUTSIDE the prefix buckets as (a reference, not - // this project's identity). + // work-by-area (#317.2, F-corrected / AC-7a): Edit/Write file_paths bucket by LOGICAL + // area after stripping any `.worktrees//` (or `.claude/worktrees//`) physical + // prefix — so a worktree `src/` edit and a main-checkout `src/` edit BOTH bucket as `src` + // (NOT `.worktrees`/``). A `kind` partition demotes genuine config + // (`.claude`/`.beads`/`.git`/``) WITHOUT filtering it (still counted), and the + // ORDER puts product areas FIRST. The fixture's `src` bucket has 4 edits: 2 worktree + // (render.ts, palette.ts) + main.ts + feed.ts — the worktree strip is what folds them. t.Run("work-by-area", func(t *testing.T) { rows := runQuery(t, db, queries["work-by-area"]) - got := map[string]string{} + kind := map[string]string{} + edits := map[string]string{} + var order []string + for _, r := range rows { + f := strings.Split(r, "|") + if len(f) != 3 { + t.Fatalf("work-by-area row should be area|kind|edits, got: %q", r) + } + kind[f[0]] = f[1] + edits[f[0]] = f[2] + order = append(order, f[0]) + } + // worktree src/ edits attribute to `src` ALONGSIDE the main-checkout src/ edit. + if edits["src"] != "4" { + t.Errorf("the 2 worktree src/ edits + 2 main-checkout src/ edits should all bucket as src=4 (the strip folds them), got %q in %v", edits["src"], edits) + } + // a worktree src/ edit must NEVER leak into a `.worktrees` bucket (the strip is load-bearing). + if _, leaked := edits[".worktrees"]; leaked { + t.Errorf("a worktree edit must NOT bucket as `.worktrees` — the physical prefix must be stripped; got %v", edits) + } + // `.claude/worktrees//internal/codex.go` strips to `internal` (the second worktree layout). + if edits["internal"] != "4" { + t.Errorf("internal should count 4 (build.go, parse.go, index.go, the .claude/worktrees-stripped codex.go), got %q in %v", edits["internal"], edits) + } + // genuine config demotes to kind=config (still counted), NOT filtered. + for _, c := range []string{".claude", ".beads", ""} { + if kind[c] != "config" { + t.Errorf("%s should be tagged kind=config (demoted, still counted), got %q in %v", c, kind[c], kind) + } + } + if kind["src"] != "product" || kind["docs"] != "product" || kind["internal"] != "product" { + t.Errorf("product areas (src/docs/internal) should be tagged kind=product, got %v", kind) + } + // product leads: the first row must be a product area, never a config one. + if len(order) > 0 && kind[order[0]] != "product" { + t.Errorf("a product area must lead the work-by-area ordering, got leading %q (kind=%q)", order[0], kind[order[0]]) + } + // non-vacuous: re-point a worktree src/ edit to `.claude/` → it leaves `src` for the config footnote. + db2 := buildFixtureDB(t) + execSQLite(t, db2, `UPDATE tool_calls SET input_json='{"file_path":"/repo/proj/.claude/render.ts"}' WHERE id=50;`) + rerows := runQuery(t, db2, queries["work-by-area"]) + reEdits := map[string]string{} + for _, r := range rerows { + f := strings.Split(r, "|") + reEdits[f[0]] = f[2] + } + if reEdits["src"] != "3" { + t.Errorf("re-pointing one worktree src/ edit to .claude/ must drop src 4→3, got %q in %v", reEdits["src"], reEdits) + } + if reEdits[".claude"] != "2" { + t.Errorf("the re-pointed edit must move to the .claude config bucket (1→2), got %q in %v", reEdits[".claude"], reEdits) + } + }) + + // mode-classification (#324, G / AC-8a): classify each TRACK (keyed by git_branch) into a + // work MODE from the per-track signal tallies (veto density, gate-pass ratio, loop markers, + // edit-kind). The fixture carries a MECHANICAL track (issue-feed: gate-pass, worktree loop, + // code edits, no veto), an EXPLORATION track (landing-copy: vetoes, a rejected path, .md + // edits), and a NEITHER-DOMINANT track (mixed-bag → unlabeled). The labels DERIVE from the + // signal rows (the independent oracle), never from SKILL.md text. Non-vacuous: (i) swapping + // the mechanical track's rows to carry high vetoes + a rejected path + prose flips its label + // to exploration; (ii) the neither-dominant track stays unlabeled (no guessed automation). + t.Run("mode-classification", func(t *testing.T) { + rows := runQuery(t, db, queries["mode-classification"]) + mode := map[string]string{} for _, r := range rows { f := strings.Split(r, "|") if len(f) != 2 { - t.Fatalf("work-by-area row should be area|edits, got: %q", r) + t.Fatalf("mode-classification row should be track|mode, got: %q", r) } - got[f[0]] = f[1] + mode[f[0]] = f[1] } - if got["internal"] != "2" { - t.Errorf("two edits under internal/ should bucket as internal=2, got %q in %v", got["internal"], got) + if mode["issue-feed"] != "mechanical" { + t.Errorf("the gate-pass/worktree-loop/code track should classify mechanical, got %q in %v", mode["issue-feed"], mode) + } + if mode["landing-copy"] != "exploration" { + t.Errorf("the high-veto/rejected/prose track should classify exploration, got %q in %v", mode["landing-copy"], mode) + } + if mode["mixed-bag"] != "unlabeled" { + t.Errorf("a neither-dominant track must stay unlabeled (generic book-keeping, never a guessed automation pitch), got %q in %v", mode["mixed-bag"], mode) + } + // non-vacuous (i): swap issue-feed's signals (high veto + rejected path + prose) → flips to exploration. + db2 := buildFixtureDB(t) + execSQLite(t, db2, `UPDATE messages SET content='[Request interrupted by user]' WHERE session_id='claude:91111111-1111-1111-1111-111111111111';`) + execSQLite(t, db2, `UPDATE messages SET content='doesn''t want to proceed' WHERE session_id='claude:92222222-2222-2222-2222-222222222222' AND id=8;`) + execSQLite(t, db2, `UPDATE tool_calls SET result_content='The user doesn''t want to proceed with this tool use.' WHERE id=60;`) + execSQLite(t, db2, `UPDATE tool_calls SET input_json='{"file_path":"/repo/proj/content/a.md"}' WHERE id=61;`) + execSQLite(t, db2, `UPDATE tool_calls SET input_json='{"file_path":"/repo/proj/content/b.md"}' WHERE id=62;`) + flipped := map[string]string{} + for _, r := range runQuery(t, db2, queries["mode-classification"]) { + f := strings.Split(r, "|") + flipped[f[0]] = f[1] } - if got["skills"] != "1" { - t.Errorf("one write under skills/ should bucket as skills=1, got %q in %v", got["skills"], got) + if flipped["issue-feed"] != "exploration" { + t.Errorf("swapping the mechanical track's signals to the exploration signature must flip its label, got %q in %v", flipped["issue-feed"], flipped) } - if got[""] != "1" { - t.Errorf("the edit to a sibling repo outside the prefix should bucket as =1, got %q in %v", got[""], got) + if flipped["mixed-bag"] != "unlabeled" { + t.Errorf("the neither-dominant track must stay unlabeled under the signal swap, got %q in %v", flipped["mixed-bag"], flipped) } }) diff --git a/skills/integration/testdata/survey/fixture-sessions.sql b/skills/integration/testdata/survey/fixture-sessions.sql index e216555d..7c993130 100644 --- a/skills/integration/testdata/survey/fixture-sessions.sql +++ b/skills/integration/testdata/survey/fixture-sessions.sql @@ -18,10 +18,12 @@ -- - tool_calls.skill_name carries namespaced (`superpowers:*`), bare -- (`running-research-spikes`), and `spacedock:*` self rows — so the #319 family -- tally reports a `superpowers` family and EXCLUDES the dominant `spacedock` self --- rows. A `superpowers` family with no files on disk is the `recovered` case. --- - Edit/Write input_json.$.file_path rows under the repo root (an `internal` and a --- `skills` bucket) and one OUTSIDE it — so the #317.2 WORK-BY-AREA query buckets by --- package and flags the external-sibling path as ``. +-- rows. A `superpowers` family with no files on disk was invoked but not checked in. +-- - Edit/Write input_json.$.file_path rows: worktree `src/` edits + a main-checkout +-- `src/` edit (the #317.2 WORK-BY-AREA query strips `.worktrees//` and buckets +-- them all as `src`), `internal`/`docs` product areas, `.claude`/`.beads` config +-- (demoted via the `kind` partition, still counted), and an external-sibling path +-- OUTSIDE the prefix (flagged ``, also config-demoted). -- - decision rows: an answered AskUserQuestion (done), a rejected one (OPEN), and an -- ExitPlanMode "User has approved your plan" approval — so the #320 query marks the -- approved plan `done` (the cheap done-prefix fix) and the rejection OPEN. @@ -127,24 +129,95 @@ INSERT INTO sessions VALUES '/u/.claude/projects/-elsewhere-otherproj/eeeeeeee.jsonl', '2026-06-02', '2026-06-02', 'Unrelated project that shares the machine.', 4, 1); --- Codex session F — THIS repo's Codex history. Production shape: cwd='' (agentsview does --- not persist Codex cwd), project keyed by the git-root basename (`proj`). Matched by the --- #69 codex-presence query (project = :repo_project); EXCLUDED from the Claude scope. +-- ============================================================================ +-- CODEX SESSIONS — four attributed to THIS repo (F, F2, F3, F4) + one same-basename +-- SIBLING (G). All carry cwd='' (agentsview persists no Codex session cwd). Attribution +-- to this repo is by exec_command.$.workdir prefix (the codex-scoped query, #321), +-- NOT by project name. The four F* sessions get an exec_command row whose $.workdir is +-- under /repo/proj (codex-scoped counts them = 4); G's is under /sibling/proj (excluded). +-- All five carry project='proj' so codex-presence (name-only, #69) counts 5 — proving the +-- two signals differ (presence 5 ⊃ scoped 4). The four F* first_messages are real-shape so +-- the codex-workstreams clustering rule (#322) has a dispatch-pattern, a task/entity-pattern, +-- an unlabeled, and a SECOND distinct dispatch task to cluster. (AC-1's "1 vs 2" describes +-- the MECHANISM minimally; the clustering AC needs 4 attributed sessions, so the fixture +-- scales to scoped=4 / presence=5 — the binding asserts are sibling-excluded + the +-- prefix-load-bearing flip, which hold at 4/5 exactly as at 1/2.) +-- ============================================================================ + +-- Codex F — DISPATCH pattern → workstream `journey-cost-ledger` (stage suffix stripped). INSERT INTO sessions VALUES ('codex:ffffffff-8888-9999-aaaa-bbbbbbbbbbbb', 'proj', 'codex', '', '', '/u/.codex/sessions/rollout-ffffffff.jsonl', - '2026-06-04', '2026-06-04', 'A codex session in this repo; cwd unrecorded.', 4, 1); + '2026-06-04', '2026-06-04', + 'Read /tmp/spacedock-dispatch/spacedock-ensign-journey-cost-ledger-implementation.md and treat its content as your assignment.', + 4, 1); + +-- Codex F2 — TASK/ENTITY backtick pattern → workstream `orient-workflow-discovery`. A +-- leading reviewer-label backtick precedes the keyword so the rule must anchor on the +-- `Spacedock entity` token, not the first backtick globally. +INSERT INTO sessions VALUES + ('codex:f2f2f2f2-0000-1111-2222-333333333333', 'proj', 'codex', + '', '', + '/u/.codex/sessions/rollout-f2f2f2f2.jsonl', + '2026-06-04', '2026-06-04', + 'You are `142-validation/Ensign`, a fresh validation worker for Spacedock entity 142 `orient-workflow-discovery`. Working directory: /repo/proj.', + 4, 1); + +-- Codex F3 — UNLABELED. An encouragement/meta first_message carries no task → (unlabeled). +INSERT INTO sessions VALUES + ('codex:f3f3f3f3-4444-5555-6666-777777777777', 'proj', 'codex', + '', '', + '/u/.codex/sessions/rollout-f3f3f3f3.jsonl', + '2026-06-04', '2026-06-04', + 'You totally got this. Take your time. Captain asked me to tell subagents they are appreciated.', + 3, 1); + +-- Codex F4 — a SECOND distinct DISPATCH task → workstream `codex-live-ci`. Proves the +-- cluster key is the extracted {TASK}, not a constant: F4 must NOT merge with F. +INSERT INTO sessions VALUES + ('codex:f4f4f4f4-8888-9999-aaaa-bbbbbbbbbbbb', 'proj', 'codex', + '', '', + '/u/.codex/sessions/rollout-f4f4f4f4.jsonl', + '2026-06-04', '2026-06-04', + 'Read /tmp/spacedock-dispatch/spacedock-ensign-codex-live-ci-validation.md and treat its content as your assignment.', + 4, 1); -- Codex session G — a SAME-BASENAME SIBLING repo's Codex history. Its git-root basename is -- also `proj`, so it keys to the identical `project` and codex-presence CANNOT distinguish --- it from session F (the documented collision — the report states "match by project NAME --- only"). Blank cwd, like all Codex sessions. Still EXCLUDED from the Claude scope. +-- it from the F* sessions (the documented collision — the report states "match by project +-- NAME only"). Blank cwd, like all Codex sessions. Its exec_command $.workdir is under +-- /sibling/proj (OUTSIDE the repo prefix) so codex-scoped EXCLUDES it. Out of Claude scope. INSERT INTO sessions VALUES ('codex:11111111-2222-3333-4444-555555555555', 'proj', 'codex', '', '', '/u/.codex/sessions/rollout-11111111.jsonl', - '2026-06-04', '2026-06-04', 'A same-basename sibling repo codex session; cwd unrecorded.', 3, 1); + '2026-06-04', '2026-06-04', + 'Read /tmp/spacedock-dispatch/spacedock-ensign-sibling-task-implementation.md and treat its content as your assignment.', + 3, 1); + +-- ---------------------------------------------------------------------------- +-- CODEX exec_command rows carry $.workdir — the attribution signal (#321 codex-scoped) and +-- the per-session activity signal (#323 codex-activity). The four F* sessions' workdirs are +-- under /repo/proj (one is a worktree path, proving the prefix admits worktrees); G's is +-- under /sibling/proj. update_plan + spawn_agent rows exercise the activity tally. +-- ---------------------------------------------------------------------------- +INSERT INTO tool_calls (id, session_id, tool_name, input_json, result_content) VALUES + (40, 'codex:ffffffff-8888-9999-aaaa-bbbbbbbbbbbb', 'exec_command', + '{"command":"go test ./...","workdir":"/repo/proj/.worktrees/journey-cost-ledger"}', NULL), + (41, 'codex:ffffffff-8888-9999-aaaa-bbbbbbbbbbbb', 'update_plan', + '{"plan":[{"step":"explore","status":"completed"}]}', NULL), + (42, 'codex:f2f2f2f2-0000-1111-2222-333333333333', 'exec_command', + '{"command":"rg orient","workdir":"/repo/proj"}', NULL), + (43, 'codex:f3f3f3f3-4444-5555-6666-777777777777', 'exec_command', + '{"command":"ls","workdir":"/repo/proj/internal"}', NULL), + (44, 'codex:f4f4f4f4-8888-9999-aaaa-bbbbbbbbbbbb', 'exec_command', + '{"command":"git status","workdir":"/repo/proj"}', NULL), + (45, 'codex:f4f4f4f4-8888-9999-aaaa-bbbbbbbbbbbb', 'spawn_agent', + '{"task":"sub"}', NULL), + -- G (sibling): exec_command workdir is OUTSIDE the repo prefix → codex-scoped excludes it. + (46, 'codex:11111111-2222-3333-4444-555555555555', 'exec_command', + '{"command":"go build","workdir":"/sibling/proj"}', NULL); -- ---------------------------------------------------------------------------- -- DECISIONS (#320): answered (done), rejected (OPEN), ExitPlanMode approval (done). @@ -173,7 +246,7 @@ INSERT INTO tool_calls (id, session_id, tool_name, skill_name, input_json, resul -- ---------------------------------------------------------------------------- -- SCAFFOLD-USAGE (#319): Skill rows across families. spacedock:* dominates (self) and --- MUST be excluded; superpowers (namespaced + bare) survives as `recovered`. (session C) +-- MUST be excluded; superpowers (namespaced + bare) survives, invoked-but-not-on-disk. (session C) -- ---------------------------------------------------------------------------- INSERT INTO tool_calls (id, session_id, tool_name, skill_name, input_json, result_content) VALUES (10, 'claude:cccccccc-9999-aaaa-bbbb-cccccccccccc', 'Skill', 'spacedock:ensign', NULL, NULL), @@ -202,12 +275,135 @@ INSERT INTO tool_calls (id, session_id, tool_name, input_json, result_content) V (33, 'claude:cccccccc-9999-aaaa-bbbb-cccccccccccc', 'Edit', '{"file_path":"/sibling/otherlib/src/util.go"}', NULL); +-- ============================================================================ +-- WORK-BY-AREA WORKTREE-ATTRIBUTION (#317.2, F-corrected / AC-7a). A worktree-based +-- project (torahmap's `work-on-issue.sh` shape) drives the agent IN a worktree per issue, +-- so PRODUCT code lands under `.worktrees//…`. The corrected query strips the physical +-- `.worktrees//` prefix and buckets by the LOGICAL area, so a worktree `src/` edit +-- counts as `src` ALONGSIDE a main-checkout `src/` edit — NOT as `.worktrees`/``. +-- A `kind` partition demotes genuine config (`.claude`/`.beads`/`.git`/``) to a +-- footnote (still counted). Session WT's cwd IS a worktree (the torahmap shape). +-- ============================================================================ + +-- Claude session WT — a WORKTREE-cwd session (the torahmap work-on-issue shape). git_branch +-- `issue-42` is its track key. Edits two worktree `src/` files; carries the MECHANICAL +-- signature for mode-classification (gate-pass decision + worktree loop markers + code, no veto). +INSERT INTO sessions VALUES + ('claude:77777777-aaaa-bbbb-cccc-dddddddddddd', 'proj', 'claude', + '/repo/proj/.worktrees/issue-42', 'issue-42', + '/u/.claude/projects/-repo-proj-.worktrees-issue-42/77777777.jsonl', + '2026-06-07', '2026-06-07', 'Run the work-on-issue loop for issue 42 in its worktree.', 6, 2); + +-- Worktree-attribution Edit/Write rows: two worktree `src/` edits (strip to `src`), a +-- main-checkout `src/` edit (also `src` — all three bucket together), a `docs/` product +-- edit, a `.claude` (config-demote), a `.beads` (config-demote). The existing #266 rows add +-- an `internal` product bucket + an `` sibling (config-demote). A `.claude/worktrees/` +-- edit proves THAT prefix strips too (→ `internal`). +INSERT INTO tool_calls (id, session_id, tool_name, input_json, result_content) VALUES + (50, 'claude:77777777-aaaa-bbbb-cccc-dddddddddddd', 'Edit', + '{"file_path":"/repo/proj/.worktrees/issue-42/src/render.ts"}', NULL), + (51, 'claude:77777777-aaaa-bbbb-cccc-dddddddddddd', 'Edit', + '{"file_path":"/repo/proj/.worktrees/issue-42/src/palette.ts"}', NULL), + (52, 'claude:77777777-aaaa-bbbb-cccc-dddddddddddd', 'Write', + '{"file_path":"/repo/proj/src/main.ts"}', NULL), + (53, 'claude:77777777-aaaa-bbbb-cccc-dddddddddddd', 'Write', + '{"file_path":"/repo/proj/docs/spec.md"}', NULL), + (54, 'claude:77777777-aaaa-bbbb-cccc-dddddddddddd', 'Edit', + '{"file_path":"/repo/proj/.claude/memory.md"}', NULL), + (55, 'claude:77777777-aaaa-bbbb-cccc-dddddddddddd', 'Edit', + '{"file_path":"/repo/proj/.beads/tracker.db"}', NULL), + (56, 'claude:77777777-aaaa-bbbb-cccc-dddddddddddd', 'Edit', + '{"file_path":"/repo/proj/.claude/worktrees/wt9/internal/codex.go"}', NULL); + +-- WT's mechanical-signature decision (gate-pass) + an exploration-mode contrast follows. +INSERT INTO tool_calls (id, session_id, tool_name, input_json, result_content) VALUES + (57, 'claude:77777777-aaaa-bbbb-cccc-dddddddddddd', 'ExitPlanMode', + '{"plan":"Implement issue 42 in the worktree."}', + 'User has approved your plan. You can now start coding.'); + +-- ============================================================================ +-- MODE-CLASSIFICATION (#324, G / AC-8a). Two designed tracks plus an explicit +-- neither-dominant track. The classifier groups by git_branch and scores the signatures. +-- Track `issue-feed` — MECHANICAL: gate-pass decision, worktree/work-on-issue loop +-- markers, code edits, ZERO veto. +-- Track `landing-copy` — EXPLORATION: multiple [Request interrupted / doesn't-want vetoes, +-- a rejected/cancelled decision, `.md` content edits. +-- Track `mixed-bag` — NEITHER dominant: one veto + one passed decision + one `.md` edit, +-- so neither score wins by the margin → `unlabeled` (generic +-- book-keeping, never a guessed automation pitch). +-- (WT's `issue-42` is ALSO mechanical — a second mechanical track — but the G assertion +-- pins the three designed tracks explicitly.) +-- ============================================================================ + +-- MECHANICAL track `issue-feed` — two sessions. +INSERT INTO sessions VALUES + ('claude:91111111-1111-1111-1111-111111111111', 'proj', 'claude', + '/repo/proj', 'issue-feed', + '/u/.claude/projects/-repo-proj/91111111.jsonl', + '2026-06-07', '2026-06-07', 'Drive the issue-feed renderer via the work-on-issue loop.', 5, 2), + ('claude:92222222-2222-2222-2222-222222222222', 'proj', 'claude', + '/repo/proj/.worktrees/issue-feed', 'issue-feed', + '/u/.claude/projects/-repo-proj-.worktrees-issue-feed/92222222.jsonl', + '2026-06-07', '2026-06-07', 'Continue the issue-feed worktree implementation.', 4, 2); +INSERT INTO tool_calls (id, session_id, tool_name, input_json, result_content) VALUES + (60, 'claude:91111111-1111-1111-1111-111111111111', 'AskUserQuestion', + '{"questions":[{"header":"Reindex strategy","question":"Incremental vs full reindex?"}]}', + 'Your questions have been answered: "Incremental vs full reindex?"="incremental"'), + (61, 'claude:91111111-1111-1111-1111-111111111111', 'Edit', + '{"file_path":"/repo/proj/src/feed.ts"}', NULL), + (62, 'claude:92222222-2222-2222-2222-222222222222', 'Edit', + '{"file_path":"/repo/proj/internal/feed/index.go"}', NULL); + +-- EXPLORATION track `landing-copy` — two sessions, prose edits + vetoes + a rejected path. +INSERT INTO sessions VALUES + ('claude:a3333333-3333-3333-3333-333333333333', 'proj', 'claude', + '/repo/proj', 'landing-copy', + '/u/.claude/projects/-repo-proj/a3333333.jsonl', + '2026-06-07', '2026-06-07', 'Draft the landing hero copy; try a few framings.', 7, 4), + ('claude:a4444444-4444-4444-4444-444444444444', 'proj', 'claude', + '/repo/proj', 'landing-copy', + '/u/.claude/projects/-repo-proj/a4444444.jsonl', + '2026-06-07', '2026-06-07', 'Rework the story section; the last direction was wrong.', 6, 3); +INSERT INTO tool_calls (id, session_id, tool_name, input_json, result_content) VALUES + (63, 'claude:a3333333-3333-3333-3333-333333333333', 'AskUserQuestion', + '{"questions":[{"header":"Hero framing","question":"Hero-vs-story framing?"}]}', + 'The user doesn''t want to proceed with this tool use. The tool use was rejected.'), + (64, 'claude:a3333333-3333-3333-3333-333333333333', 'Write', + '{"file_path":"/repo/proj/content/hero.md"}', NULL), + (65, 'claude:a4444444-4444-4444-4444-444444444444', 'Write', + '{"file_path":"/repo/proj/content/story.md"}', NULL); + +-- NEITHER-DOMINANT track `mixed-bag` — one session, balanced signals → unlabeled. +INSERT INTO sessions VALUES + ('claude:b5555555-5555-5555-5555-555555555555', 'proj', 'claude', + '/repo/proj', 'mixed-bag', + '/u/.claude/projects/-repo-proj/b5555555.jsonl', + '2026-06-07', '2026-06-07', 'Some odds and ends across the repo.', 4, 2); +INSERT INTO tool_calls (id, session_id, tool_name, input_json, result_content) VALUES + (66, 'claude:b5555555-5555-5555-5555-555555555555', 'AskUserQuestion', + '{"questions":[{"header":"Odds and ends","question":"Which loose end first?"}]}', + 'Your questions have been answered: "Which loose end first?"="the docs"'), + (67, 'claude:b5555555-5555-5555-5555-555555555555', 'Write', + '{"file_path":"/repo/proj/docs/notes.md"}', NULL); + -- ---------------------------------------------------------------------------- --- Veto marker in session B's message stream (interruption signal, prose-read). +-- Veto + loop markers in the message stream (interruption + mechanical-loop signals, +-- prose-read). Session B carries the original veto; the G tracks carry their signatures. -- ---------------------------------------------------------------------------- INSERT INTO messages VALUES (1, 'claude:aaaaaaaa-1111-2222-3333-444444444444', 'user', 'Pick up the parser refactor and ship it.'), (2, 'claude:bbbbbbbb-5555-6666-7777-888888888888', 'user', 'Now wire up the regression suite.'), (3, 'claude:bbbbbbbb-5555-6666-7777-888888888888', 'user', '[Request interrupted by user]'), (4, 'claude:cccccccc-9999-aaaa-bbbb-cccccccccccc', 'user', 'Build the feature behind a worktree.'), - (5, 'codex:ffffffff-8888-9999-aaaa-bbbbbbbbbbbb', 'user', 'A codex session in this repo; cwd unrecorded.'); + (5, 'codex:ffffffff-8888-9999-aaaa-bbbbbbbbbbbb', 'user', 'A codex session in this repo; cwd unrecorded.'), + -- WT (issue-42): worktree loop marker, no veto → reinforces mechanical. + (6, 'claude:77777777-aaaa-bbbb-cccc-dddddddddddd', 'user', 'Run the work-on-issue loop for issue 42 in its worktree.'), + -- issue-feed (mechanical): worktree/work-on-issue loop markers, no veto. + (7, 'claude:91111111-1111-1111-1111-111111111111', 'user', 'Drive the work-on-issue loop in the worktree.'), + (8, 'claude:92222222-2222-2222-2222-222222222222', 'user', 'Continue the worktree implementation.'), + -- landing-copy (exploration): repeated vetoes / doesn't-want-to-proceed steering. + (9, 'claude:a3333333-3333-3333-3333-333333333333', 'user', '[Request interrupted by user]'), + (10, 'claude:a3333333-3333-3333-3333-333333333333', 'user', 'doesn''t want to proceed — try a warmer tone'), + (11, 'claude:a4444444-4444-4444-4444-444444444444', 'user', '[Request interrupted by user] rethink the framing'), + -- mixed-bag (neither dominant): a single veto, balancing its one passed decision + one .md edit. + (12, 'claude:b5555555-5555-5555-5555-555555555555', 'user', '[Request interrupted by user]'); diff --git a/skills/survey/SKILL.md b/skills/survey/SKILL.md index ab77c361..c08f2c6a 100644 --- a/skills/survey/SKILL.md +++ b/skills/survey/SKILL.md @@ -10,7 +10,7 @@ user-invocable: true Survey is the first thing you run on unfamiliar ground: it reconstructs what the AI agents in this project have implicitly been doing, from their session history. It reports the inferred workflow, the workstreams, the recent decisions, and — load-bearing — the OPEN decisions (the abandoned or unanswered forks) plus how often the human had to step in. Then it offers to commission a real spacedock workflow with explicit gates from what it found. -It reads **agentsview**'s session DB and is strictly read-only — the recommended queries live in `references/queries.sql` (one labeled query per concern) so nothing is a black box. For now it surveys **Claude Code** history (the decision and interruption signals below are Claude's); agentsview also ingests Codex, Gemini, and more, and surfacing those agents' decision/scaffold/work signals is a deferred follow-up — the one exception is a flagged Codex *presence* count (step 2), since Codex sessions land with no recorded cwd and would otherwise vanish silently. The closing move is the discovery → commission bridge: the OPEN decisions become candidate gates, the workstreams become candidate entities, the inferred loop becomes the stage list. +It reads **agentsview**'s session DB and is strictly read-only — the recommended queries live in `references/queries.sql` (one labeled query per concern) so nothing is a black box. The decision and interruption signals below are **Claude Code**'s; **Codex** is surfaced too, as its own body section (a workdir-attributed count + workstream clusters + activity), since Codex sessions land with no recorded cwd and need the `exec_command.$.workdir` signal to be scoped to this repo. Gemini and per-file Codex work-by-area remain deferred follow-ups. The closing move is the discovery → commission bridge: the OPEN decisions become candidate gates, the workstreams become candidate entities, the inferred loop becomes the stage list — and the offer is keyed to each track's MODE (automation for mechanical tracks, book-keeping for exploration tracks). Run the four steps in order: **check agentsview → scan → recognize scaffold → report and offer**. @@ -96,16 +96,24 @@ run_query() { # run_query — :repo_root → REPO_ROOT, :repo_project printf ".param set :repo_root '%s'\n.param set :repo_project '%s'\n%s\n" "$REPO_ROOT" "$REPO_PROJECT" "$q" | sqlite3 "$DB" } -run_query scoping # #318 — sessions|blank_cwd|span over the cwd-prefix-scoped repo -run_query codex-presence # #69 — flagged Codex count|blank_cwd by project NAME (cwd unrecorded) -run_query scaffold-usage # #319 — behavioral skill_name family tally (spacedock self EXCLUDED) -run_query work-by-area # #317.2 — Edit/Write file_path bucketed by package (external = reference) -run_query decision-open # #320 — AskUserQuestion/ExitPlanMode frontier; OPEN sorts first +run_query scoping # #318 — sessions|blank_cwd|span over the cwd-prefix-scoped repo +run_query codex-presence # #69 — flagged Codex count|blank_cwd by project NAME (cwd unrecorded) +run_query codex-scoped # #321 — Codex attributed by exec_command.$.workdir prefix (sibling-free) +run_query codex-workstreams # #322 — cluster codex-scoped sessions into ensign-task workstreams +run_query codex-activity # #323 — exec_command/update_plan/spawn_agent tally over the codex-scoped set +run_query scaffold-usage # #319 — behavioral skill_name family tally (spacedock self EXCLUDED) +run_query work-by-area # #317.2 — Edit/Write file_path → LOGICAL area (worktree prefix stripped) + kind +run_query decision-open # #320 — AskUserQuestion/ExitPlanMode frontier; OPEN sorts first +run_query mode-classification # #324 — classify each git_branch track mechanical/exploration/unlabeled ``` `scoping` returns `sessions=0` → there is no Claude agent history for this repo; say so and stop. Nothing to discover. (Survey reads Claude history only for now; a repo whose only agent history is Codex/Gemini will report "no agent history" here — surfacing those agents is a deferred follow-up.) Note the `blank_cwd` count in the report if non-zero (sessions agentsview never captured a cwd for, which the repo-root scope cannot place). -`codex-presence` returns the count of `agent='codex'` sessions matching this repo's `project` name, plus the blank-cwd sum among them. Because agentsview does not persist Codex cwd, matches are by git-root-basename `project` only — a same-basename sibling repo will collide, so the count may include unrelated Codex sessions. Treat it as a presence flag only; these sessions are never counted in the Claude `scoping`, `scaffold-usage`, or `work-by-area` sets. When the count is `> 0`, the report renders a hint line (step 4) stating the count and that the match is by project NAME only. +`codex-presence` returns the count of `agent='codex'` sessions matching this repo's `project` name, plus the blank-cwd sum among them. Because agentsview does not persist Codex cwd, matches are by git-root-basename `project` only — a same-basename sibling repo will collide, so the count may include unrelated Codex sessions. Treat it as a presence flag only; these sessions are never counted in the Claude `scoping`, `scaffold-usage`, or `work-by-area` sets. The body Codex section is built from the `codex-scoped` set below instead — when `codex-presence > codex-scoped`, the gap is the name-only superset (possible sibling), and the report says so. + +**Codex body signals (`codex-scoped`, `codex-workstreams`, `codex-activity`).** agentsview persists no Codex session cwd, but a Codex session's `exec_command` tool calls carry `$.workdir` (the absolute working directory of each shell command). `codex-scoped` attributes a Codex session to THIS repo when it has an `exec_command` whose `$.workdir` is under the repo-root prefix — the workdir analogue of the Claude cwd-prefix scope, so it admits this repo's Codex and EXCLUDES a same-basename sibling (whose workdirs fall under a different prefix). Over that sibling-free set, `codex-workstreams` clusters the sessions into ensign-task workstreams from each `first_message` (the runnable 3-case rule: dispatch-file read → `{TASK}` with the stage suffix stripped; `Spacedock task/entity` backtick → the backtick-quoted `{TASK}`; else `(unlabeled)`), and `codex-activity` tallies the per-tool activity (`exec_command`/`update_plan`/`spawn_agent`). These are the Codex body section (step 4) — the workstreams surface real Codex tracks the Claude-only body misses. All from the agentsview DB; no raw-rollout parsing. (Per-file Codex work-by-area and a source-health signal are deferred — they need an upstream agentsview ingestion change.) + +**Track modes (`mode-classification`).** `mode-classification` labels each `git_branch` track `mechanical` (low veto + gate-pass-dominant + issue→worktree→PR loop markers + code edits), `exploration` (high veto + a rejected/cancelled path + prose/`.md` edits), or `unlabeled` (neither signature clearly dominant). The report reads the per-track `mode` to label WORKSTREAMS and to pick the right commission offer per track (step 4) — automation for mechanical, book-keeping for exploration, generic book-keeping for unlabeled (never a guessed automation pitch). **Honest signal accounting.** The `decision-open` rows are the human-decision points; `OPEN` = still needs the human, and you lead the report with those. For the interruption total, count the AskUserQuestion / ExitPlanMode decisions plus the hard-veto markers Claude sessions retain (`[Request interrupted` / `Request interrupted by user` / `doesn't want to proceed` in the message stream), over the same repo-scoped session set; `pct = total*100/user_turns`. Never dress an empty section up as "no decisions" — if a section is empty, say the run found none of that signal. @@ -122,13 +130,13 @@ Recognize the scaffold from TWO signals and reconcile them — a file probe (wha **Behavioral tally.** The `scaffold-usage` rows are a `family → invocations` tally normalized from `tool_calls.skill_name` (`superpowers:brainstorming` and the bare `running-research-spikes` both fold to family `superpowers`); the `spacedock` family is excluded because survey/ensign self-invocation otherwise dominates and would make every repo read as "uses spacedock". -**Join and state the fact.** For each family appearing in either signal, state two observed facts plainly: its invocation count (the behavioral tally) and whether it is checked in on disk (the file probe). Do not classify into buckets — state what is true. The one case worth naming explicitly is the one the file-only probe misses: a family invoked but absent from disk was **recovered from behavior, not files** — say exactly that, with the count. For example: +**Join and state the fact.** For each family appearing in either signal, state two observed facts plainly: its invocation count (the behavioral tally) and whether it is checked in on disk (the file probe). Do not narrate HOW the family was discovered (behavior vs files) — state only the usage + on-disk fact. For a family invoked but absent from disk, state the count and that it is not checked in. For example: -> `superpowers was recovered from behavior, not files: 186 skill invocations, but no checked-in .claude/skills/superpowers. Other recovered one-offs: plan-writing, using-git-worktrees, systematic-debug, simplify, debugging.` +> `superpowers: 186 invocations (not checked in). Other one-offs: plan-writing, using-git-worktrees, systematic-debug, simplify, debugging.` A family on disk and invoked is stated plainly (family + count + present on disk); a family on disk but never invoked is stated as installed-but-not-yet-invoked. The state-the-fact statement drives the comparative benefit in the report (step 4). The probe reads files; the *numbers* come from the scan (step 2). -## 4. Confirm, then report and offer +## 4. Report and offer Every `{slot}` below is a FILL slot: substitute the real value from the step-2 scan before you show the user. A literal `{slot}` (or a `<…>` angle token) left in what you present is a bug — never show the user an unfilled slot. If a slot has no data (e.g. zero OPEN decisions), drop that line rather than printing an empty slot. @@ -142,31 +150,38 @@ Every `{slot}` below is a FILL slot: substitute the real value from the step-2 s **Mandatory degrade.** When NO repo signal is available (not a git repo, or `git log` / PR lookup fails or is empty), the frontier degrades to transcript-only and EVERY OPEN fork is flagged **`unverified`** in the report — never silently presented as authoritative. The degrade is the default behavior, not an error. -Tell the user what you found and wait for a yes: - -> Found **{N} sessions** in `{project}` (`{date range}`), with **{D} decision points** and **{V} interruptions**. Want me to lay it out? +Lead with the one-line headline, then render the body DIRECTLY in the same turn — do NOT stop and ask first. The survey is read-only orientation: the body IS the value, and a pre-body confirm/menu is a round-trip with no decision behind it (and risks ending with no survey at all). The ONLY stop in this flow is the end-of-report commission OFFER (the real decision). So emit the headline and flow straight into the synthesis fence: -Then synthesize this, one screen: +> Found **{N} sessions** in `{project}` (`{date range}`), with **{D} decision points** and **{V} interruptions** — here's the lay of the land: ``` PROJECT: {basename} {sessions} Claude sessions · {date range} {if blank_cwd>0: {blank_cwd} uncaptured-cwd sessions} - {if codex-presence>0: {codex_sessions} Codex sessions match this repo by project NAME only (agentsview does not record Codex cwd) — may include a same-named sibling repo; the Claude-scoped body below does not cover them} + +CODEX (only if codex-scoped>0; workdir-attributed, distinct from the name-only presence flag) + {codex_scoped_sessions} Codex sessions attributed to this repo by exec_command working dir + {if codex-presence>codex-scoped: (codex-presence matches {codex_sessions} by project NAME only — may include a same-named sibling; the workdir-attributed count above is sibling-free)} + workstreams: {the codex-workstreams clusters — workstream → session count; (unlabeled) last} + activity: {the codex-activity tally — exec_command {n}, update_plan {n}, spawn_agent {n}} SCAFFOLD - {state-the-fact per family: family + invocation count + on-disk presence; call out a family invoked but not on disk as "recovered from behavior, not files"; or "none"} + {state-the-fact per family: family + invocation count + on-disk presence — e.g. "superpowers: 186 invocations (not checked in). Other one-offs: …"; or "none"} INFERRED WORKFLOW {the implicit loop across the decisions + prompts, as an arrow chain} — {one honest line} -WORKSTREAMS - {cluster the decisions + prompts into tracks; one line each, status glyph} +WORKSTREAMS mode + {cluster the decisions + prompts into tracks; one line each, status glyph + the mode-classification label (mechanical / exploration / unlabeled) per track} -WORK BY AREA (what this is — where edits land) - {the work-by-area buckets: package — {edits}; an bucket is edits to a sibling repo (a reference, not this project)} +WORK BY AREA (logical areas; worktree edits attributed to their area — F) + {the product work-by-area buckets (kind=product), by edit count: area — {edits}} + {if any kind=config: (+ {sum} edits in .claude/.beads/.git config + sibling refs, footnoted)} NEEDS YOU (only if any decision is still OPEN after the repo cross-check) - ⚠ {the true-open forks — never-decided questions; lead with them}{if degraded: each flagged unverified (no repo signal)} + {if any OPEN exploration track: exploration (tracked, prioritized — work you're holding, not bottlenecks):} + ◐ {the open EXPLORATION forks — deliberately-held threads} + {if any OPEN mechanical track: mechanical (automatable backlog — gate-and-drive candidates):} + ⚠ {the open MECHANICAL forks — never-decided questions}{if degraded: each flagged unverified (no repo signal)} BACKLOG (only if any fork was decided-not-shipped) {decided forks with no shipped artifact yet} @@ -174,30 +189,30 @@ BACKLOG (only if any fork was decided-not-shipped) RECENT DECISIONS (answered or shipped) {the rest: header — short question} -INTERRUPTIONS (where spacedock can help) - {total} times you stepped in across {sessions} sessions — {decisions} decision points - + {vetoes} course-corrections, {pct}% of your turns. +INTERRUPTIONS + {if any exploration track: exploration tracks: {n} steers across {m} sessions — this IS the work; book-keeping tracks the threads} + {if any mechanical track: mechanical tracks: {n} steps across {m} sessions — gates + autonomy would carry these between your calls} ``` ### The discovery → commission bridge (close every report with this) -After the synthesis, recognize the scaffold (step 3) and offer a COMPARABLE spacedock workflow, with a benefit stated **concretely and comparatively**, anchored to the actual scan numbers — never a placeholder, never a generic pitch. As in the synthesis above, every `{slot}` is a FILL slot: substitute the real step-2 number/forks before you show the user; a literal `{slot}` in your output is a bug. Use the per-scaffold framing: - -- **superpowers** is a library of disciplines an agent invokes (brainstorming → writing-plans → executing-plans → subagent-driven-development), with human interruption left implicit — *the human decides when to step in.* Offer a spacedock workflow that maps those disciplines to stages (ideation → implementation → validation) and makes the interruption points EXPLICIT gates. State it tied to the scan's interruption count: - > superpowers gives your agent the *plays* but leaves *when you step in* up to you — this scan counted **{V} interruptions across {N} sessions** where you had to. A spacedock workflow turns those into explicit approval gates, so the agent advances on its own between your calls and only stops where you marked a gate. +After the synthesis, offer spacedock — but key the offer to the MODE of each track (from the `mode-classification` query). Two modes call for two DIFFERENT things; do NOT make one undifferentiated automate-everything pitch. As in the synthesis above, every `{slot}` is a FILL slot: substitute the real step-2 numbers/forks/track-names before you show the user; a literal `{slot}` in your output is a bug. -- **gsd / get-shit-done** runs a fixed phase sequence per task, one task at a time. Offer a spacedock workflow that maps the gsd phases to stages and adds gates + durable entity state, so multiple work items move through the same phases concurrently and pause only at gates. State it tied to the OPEN forks: - > gsd drives one task through its phases; spacedock tracks every work item through the same stages as durable on-disk state, gates the steps you flagged as needing you (this scan found these OPEN forks: **{the actual OPEN decisions}**), and lets several run in parallel without you re-driving each. +**For the MECHANICAL tracks (mode=mechanical) — offer AUTOMATION.** These are disciplined routine execution (the issue→worktree→PR loop, routine implementation): gate the crucial decisions and let the agent drive the loop between gates. Keep the gate-and-autonomy pitch — it is CORRECT for these. State it tied to the scan (the mechanical tracks' names + their gate-pass count or the interruption count). The per-scaffold flavor sharpens the automation offer: + - **superpowers** maps its disciplines (brainstorming → writing-plans → executing-plans → subagent-driven-development) to stages with the interruption points made EXPLICIT gates. + - **gsd / get-shit-done** maps its fixed phases to stages + durable entity state so several work items move concurrently, pausing only at gates. + > For your MECHANICAL tracks (**{the mechanical track names}**): a spacedock workflow that gates the crucial decisions and lets the agent drive the loop between gates — these passed **{the gate-pass count}**, so the agent can carry them and stop only where you marked a gate. -- **similar / unknown scaffold** — name it (use the names the step-3 detection emitted), then offer the generic spacedock benefit (gates from the interruption count, entity state, parallelism) without inventing a false-specific comparison. +**For the EXPLORATION tracks (mode=exploration) — offer BOOK-KEEPING, never automation.** These are human-driven creative/exploratory work (writing/content, design exploration, steering an agent that drifts): the involvement IS the point. Offer spacedock as structure for the parallel threads — track each draft/path and its state (in-flight / paused-by-choice / abandoned) so several run in parallel without losing which is which. An open thread is tracked-prioritized work, NOT a bottleneck; a cancelled path is a valid tracked outcome, NOT a failure. The exploration offer MUST NOT contain "advances on its own", "without you re-driving each", "minimize involvement", or any automate-the-human-out framing. + > For your EXPLORATION tracks (**{the exploration track names}**): spacedock as book-keeping — track each draft/design path and its state (in-flight / paused-by-choice / abandoned) so you run several in parallel without losing which is which. The **{the cancelled-path count}** cancelled paths are tracked outcomes, not failures; the involvement is the point, so there's no automation here — just structure for the threads. -- **none** — offer the generic spacedock benefit anchored to the interruption count and OPEN forks. +**For UNLABELED tracks (mode=unlabeled) — generic book-keeping**, never a guessed automation pitch (the asymmetry favors not mis-offering: a missed automation offer is cheap; a wrong automation pitch at creative work is the misread to avoid). -The two comparisons MUST differ — superpowers-vs-spacedock (implicit-interruption → explicit-gates) is a different claim from gsd-vs-spacedock (single-task-phases → parallel-gated-entities). Each must cite a real scan number (the filled `{V}`/`{N}` or the filled OPEN forks), not a placeholder. +If a project carries BOTH modes, make BOTH offers (they MUST differ — the mechanical one keeps the gate-and-drive pitch; the exploration one carries none of the automate-the-human-out framing). If it carries only one mode, make only that offer. **none** scaffold → the generic spacedock benefit, mode-keyed the same way. Each offer must cite a real scan number (filled track names, gate-pass count, OPEN forks, or cancelled-path count), not a placeholder. Then make the offer: -> Want me to commission a spacedock workflow from this? +> Want me to commission a spacedock workflow from this{if both modes: — gated automation for the mechanical tracks, thread book-keeping for the exploration tracks}? On a **yes**, invoke commission in batch mode, supplying inputs derived from the scan (commission already accepts batch design inputs in its first message — see its Batch Mode). Assemble: @@ -215,6 +230,7 @@ On a **no**, stop — the survey stands on its own as an orientation. - **Project name** = path basename. - **Workflow + workstreams: infer them**, primarily from the decisions (the `PROMPTS` are sparse/noisy — secondary). Be honest when a track is one-off or stalled. - **Decisions + stats are data, not invention.** `OPEN` = still needs the human; lead the report with the true-open forks. The transcript scan can't tell shipped from open — that's what the step-4 repo cross-check is for; drop a fork to "shipped" only on a confident match, and flag the whole frontier `unverified` when there's no repo signal. -- **Work-by-area is identity, not a to-do list.** The `work-by-area` buckets say WHAT this project is (where edits land); an `` bucket is edits to a sibling repo — a reference, not this project's identity. Report it separately from the decision frontier (where you stop). +- **Work-by-area is identity, not a to-do list.** The `work-by-area` buckets say WHAT this project is (where edits land), by LOGICAL area regardless of physical location — a worktree edit is attributed to its area (a worktree `src/` edit is `src`), so worktree-based product work is not hidden. The lead lists product areas (`kind=product`) by edit count; genuine config (`.claude`/`.beads`/`.git`) and an `` sibling-repo path demote to a footnote (`kind=config`) — still counted, just not the project's identity. Report it separately from the decision frontier (where you stop). +- **Two work modes, two offers.** `mode-classification` labels each track mechanical / exploration / unlabeled. Mechanical tracks (the issue→worktree→PR loop) get the automation offer (gate-and-drive); exploration tracks (creative/content/design steering) get the book-keeping offer (track the parallel threads + their states) — the involvement IS the point, so NO automation pitch for them; an unlabeled track gets generic book-keeping, never a guessed automation pitch. - **Fill every slot, never invent.** Every `{slot}` in the report and the comparison comes from the step-2 numbers; a literal `{slot}` shown to the user is a bug. If a section's signal is empty (no OPEN decisions, no interruptions, no edits), say the run found none — never dress an empty section up as "no decisions." -- **Claude-only body, with a flagged Codex presence count.** The report body (workflow, workstreams, decisions, work-by-area, scaffold) is built from Claude history; Codex/Gemini decision/scaffold/work signals are a deferred follow-up. The lone Codex signal is the step-2 presence count, rendered as the step-4 hint line when `> 0` — a count and a name-only-match caveat, nothing more. A repo whose ONLY history is Codex still reports "no agent history" at the `scoping=0` stop (the Claude body has nothing); don't imply otherwise beyond what step 2 reports. +- **Claude body + a Codex body section.** The Claude body (workflow, workstreams, decisions, work-by-area, scaffold) is built from Claude history. Codex is surfaced too, as its own section (step 4) from the `codex-scoped` set: the workdir-attributed count + the workstream clusters + the activity tally (Gemini and per-file Codex work-by-area remain deferred follow-ups). A repo whose ONLY history is Codex still reports "no agent history" at the `scoping=0` stop (the Claude body has nothing); the Codex section renders alongside a non-empty Claude body, not in place of it. diff --git a/skills/survey/references/queries.sql b/skills/survey/references/queries.sql index 1d0449fb..8c10bcfc 100644 --- a/skills/survey/references/queries.sql +++ b/skills/survey/references/queries.sql @@ -69,6 +69,109 @@ FROM sessions WHERE agent = 'codex' AND project = :repo_project; +-- name: codex-scoped +-- #321 — Codex attributed to THIS repo by exec_command.$.workdir prefix (DISTINCT from +-- codex-presence). agentsview persists no Codex session cwd, but Codex's `exec_command` +-- tool calls DO carry `$.workdir` — the absolute working directory of the shell command. +-- Scope a Codex session to this repo when it has at least one `exec_command` whose +-- `$.workdir` is `:repo_root` or strictly under it — the workdir analogue of the Claude +-- cwd-prefix scope. This ADMITS this repo's Codex and EXCLUDES a same-basename sibling +-- (whose workdirs fall under a different absolute prefix), so the count is sibling-free +-- where codex-presence (project NAME only) is not. The two stay distinct: codex-presence +-- is the name-matched superset (may include a sibling); codex-scoped is the workdir-attributed +-- subset. A Codex session whose only workdirs are outside the prefix (a /tmp run) or that +-- has no usable workdir (a near-empty session) is conservatively excluded — same discipline +-- as the Claude blank-cwd exclusion. +SELECT COUNT(*) AS codex_scoped_sessions +FROM sessions s +WHERE s.agent = 'codex' + AND EXISTS ( + SELECT 1 FROM tool_calls t + WHERE t.session_id = s.id + AND t.tool_name = 'exec_command' + AND (json_extract(t.input_json, '$.workdir') = :repo_root + OR json_extract(t.input_json, '$.workdir') LIKE :repo_root || '/%') + ); + +-- name: codex-workstreams +-- #322 — cluster the codex-scoped sessions into ensign-task WORKSTREAMS from each session's +-- first_message. This is the runnable artifact for the clustering rule (NOT prose): three +-- ordered cases, in order — +-- 1. Dispatch pattern — first_message reads a dispatch file +-- (`…/spacedock-dispatch/spacedock-ensign-{TASK}-{stage}.md`); the workstream is {TASK}, +-- with the trailing `-ideation`/`-implementation`/`-validation` stage stripped and the +-- label anchored on the `.md` boundary so trailing prose does not leak in. +-- 2. Task/entity pattern — first_message names a `Spacedock task`/`Spacedock entity`; the +-- label is the backtick-quoted {TASK} that FOLLOWS that keyword (anchor on the keyword, +-- not the first backtick globally, so a leading reviewer-label backtick does not leak). +-- 3. Unlabeled — a null first_message or an encouragement/meta message ("Captain asked me +-- to tell subagents…", "You totally got this…") carries no task; it falls to an honest +-- `(unlabeled)` bucket — counted, never invented a name for. +-- Same `EXISTS exec_command workdir-under-prefix` scope as codex-scoped (the clustering only +-- runs over the attributed set). `(unlabeled)` sorts last so the named tracks lead. +WITH scoped AS ( + SELECT s.id, s.first_message AS fm + FROM sessions s + WHERE s.agent = 'codex' + AND EXISTS ( + SELECT 1 FROM tool_calls t + WHERE t.session_id = s.id + AND t.tool_name = 'exec_command' + AND (json_extract(t.input_json, '$.workdir') = :repo_root + OR json_extract(t.input_json, '$.workdir') LIKE :repo_root || '/%') + ) +), +dispatch_tail AS ( -- the text AFTER the dispatch-file marker, for the case-1 sessions + SELECT id, fm, + substr(fm, instr(fm, 'spacedock-dispatch/spacedock-ensign-') + + length('spacedock-dispatch/spacedock-ensign-')) AS after_marker, + substr(fm, instr(fm, 'Spacedock ')) AS after_kw -- the text from `Spacedock task/entity …` + FROM scoped +), +labeled AS ( + SELECT id, + CASE + WHEN fm LIKE '%spacedock-dispatch/spacedock-ensign-%.md%' THEN + replace(replace(replace( + substr(after_marker, 1, instr(after_marker, '.md') - 1), + '-ideation', ''), '-implementation', ''), '-validation', '') + WHEN fm LIKE '%Spacedock task `%' OR fm LIKE '%Spacedock entity %`%' THEN + substr( + substr(after_kw, instr(after_kw, '`') + 1), + 1, + instr(substr(after_kw, instr(after_kw, '`') + 1), '`') - 1 + ) + ELSE '(unlabeled)' + END AS workstream + FROM dispatch_tail +) +SELECT workstream, COUNT(*) AS sessions +FROM labeled +GROUP BY workstream +ORDER BY (workstream = '(unlabeled)') ASC, sessions DESC, workstream ASC; + +-- name: codex-activity +-- #323 — per-tool ACTIVITY tally over the codex-scoped set. Codex's tool surface is its own +-- shape: `exec_command` (shell commands — reads dominate; this is also the $.workdir signal), +-- `update_plan` (STRUCTURED plan-step progression — a real decision/workstream signal), +-- `spawn_agent` (sub-agent fan-out — Codex multi-agent). The report renders these as the +-- Codex activity summary (a coherent DB signal, NOT raw-rollout parsing). Same workdir-prefix +-- scope as codex-scoped, so the tally covers only this repo's attributed Codex. +SELECT t.tool_name AS tool, COUNT(*) AS calls +FROM tool_calls t +JOIN sessions s ON t.session_id = s.id +WHERE s.agent = 'codex' + AND t.tool_name IN ('exec_command', 'update_plan', 'spawn_agent') + AND EXISTS ( + SELECT 1 FROM tool_calls w + WHERE w.session_id = s.id + AND w.tool_name = 'exec_command' + AND (json_extract(w.input_json, '$.workdir') = :repo_root + OR json_extract(w.input_json, '$.workdir') LIKE :repo_root || '/%') + ) +GROUP BY t.tool_name +ORDER BY calls DESC; + -- name: scaffold-usage -- #319 / #317.1 — behavioral scaffold tally. GROUP the tool_calls.skill_name signal by -- FAMILY over the repo-scoped Claude session set: split on the first ':' (namespaced @@ -78,7 +181,7 @@ WHERE agent = 'codex' -- is EXCLUDED — survey/ensign self-invocation dominates the tally (1026/1045 on this -- repo) and would make every surveyed repo read as "uses spacedock". This is the -- behavioral half the file-only probe misses: a family that appears here with no files --- on disk is a `recovered` scaffold. +-- on disk was invoked but not checked in (the report states the usage + presence fact). SELECT family, COUNT(*) AS invocations FROM ( SELECT @@ -107,35 +210,67 @@ GROUP BY family ORDER BY invocations DESC; -- name: work-by-area --- #317.2 — WORK-BY-AREA: identity from edits. Bucket Edit/Write target file_paths over --- the repo-scoped Claude session set by the FIRST path segment under the repo root --- (`internal`, `skills`, `docs`, …). A file_path that is NOT under the repo-root prefix --- is an edit to an external sibling repo — a reference, not this project's identity — so --- it buckets as ``. "What this is" (where edits land) is reported separately --- from "where you stop" (the decision frontier). +-- #317.2 — WORK-BY-AREA: identity from edits, by LOGICAL area regardless of physical +-- location. Worktree-based projects (e.g. a `work-on-issue.sh` that drives the agent IN a +-- worktree per issue) land their PRODUCT code under `.worktrees//…` — so a naive +-- first-segment bucket would mis-file a `.worktrees//src/x` edit as `.worktrees` and +-- HIDE the real work. The fix: strip a leading `.worktrees//` (and `.claude/worktrees/ +-- /`) physical prefix FIRST, then bucket by the next segment — so a worktree `src/` +-- edit and a main-checkout `src/` edit BOTH count as `src`. The result reflects logical +-- areas (`src`/`internal`/`docs`/…) wherever the edit physically landed. +-- +-- A `kind` partition then demotes genuine NON-code paths WITHOUT filtering them (still +-- counted, honest full accounting): `config` = `.claude` (memory/config/plans), `.beads` +-- (issue tracker), `.git`, and `` (a sibling repo path outside the repo root — +-- a reference, not this project). `product` = every logical area under the repo root. +-- `ORDER BY (kind='config') ASC, edits DESC` puts product areas FIRST even when a config +-- bucket out-counts them; the report leads with product and footnotes the config sum. SELECT - CASE - WHEN fp LIKE :repo_root || '/%' THEN - CASE - WHEN instr(substr(fp, length(:repo_root) + 2), '/') > 0 - THEN substr(substr(fp, length(:repo_root) + 2), 1, instr(substr(fp, length(:repo_root) + 2), '/') - 1) - ELSE substr(fp, length(:repo_root) + 2) - END - ELSE '' - END AS area, + area, + CASE WHEN area IN ('.claude', '.beads', '.git', '') THEN 'config' ELSE 'product' END AS kind, COUNT(*) AS edits FROM ( - SELECT json_extract(t.input_json, '$.file_path') AS fp - FROM tool_calls t - JOIN sessions s ON t.session_id = s.id - WHERE s.agent = 'claude' - AND s.file_path NOT LIKE '%/subagents/%' - AND (s.cwd = :repo_root OR s.cwd LIKE :repo_root || '/%') - AND t.tool_name IN ('Edit', 'Write') - AND json_extract(t.input_json, '$.file_path') IS NOT NULL + SELECT + -- area: the logical first segment after stripping any worktree physical prefix. + CASE + WHEN logical IS NULL THEN '' + WHEN instr(logical, '/') > 0 THEN substr(logical, 1, instr(logical, '/') - 1) + ELSE logical + END AS area + FROM ( + SELECT + CASE + WHEN fp NOT LIKE :repo_root || '/%' THEN NULL -- external sibling: bucket + -- strip a leading `.worktrees//` (drop the first two segments of the relative path) + WHEN rel LIKE '.worktrees/%/%' THEN + substr(rel, instr(substr(rel, length('.worktrees/') + 1), '/') + length('.worktrees/') + 1) + -- strip a leading `.claude/worktrees//` (drop the first three segments) + WHEN rel LIKE '.claude/worktrees/%/%' THEN + substr(rel, instr(substr(rel, length('.claude/worktrees/') + 1), '/') + length('.claude/worktrees/') + 1) + ELSE rel + END AS logical + FROM ( + SELECT fp, + CASE WHEN fp LIKE :repo_root || '/%' + THEN substr(fp, length(:repo_root) + 2) -- path relative to the repo root + ELSE NULL END AS rel + FROM ( + SELECT json_extract(t.input_json, '$.file_path') AS fp + FROM tool_calls t + JOIN sessions s ON t.session_id = s.id + WHERE s.agent = 'claude' + AND s.file_path NOT LIKE '%/subagents/%' + AND (s.cwd = :repo_root OR s.cwd LIKE :repo_root || '/%') + AND t.tool_name IN ('Edit', 'Write') + AND json_extract(t.input_json, '$.file_path') IS NOT NULL + ) + ) + ) ) GROUP BY area -ORDER BY edits DESC; +ORDER BY + (area IN ('.claude', '.beads', '.git', '')) ASC, -- config demotes, product leads + edits DESC; -- name: decision-open -- #320 — the decision/OPEN frontier (raw transcript scan; the repo cross-check is @@ -165,3 +300,105 @@ WHERE s.agent = 'claude' AND t.tool_name IN ('AskUserQuestion', 'ExitPlanMode') ORDER BY status ASC, t.id DESC LIMIT 20; + +-- name: mode-classification +-- #324 (G) — classify each TRACK into a work MODE, so the report can make the RIGHT +-- commission offer per track (automation for mechanical, book-keeping for exploration) +-- instead of one undifferentiated pitch. The track key is the session's `git_branch`: +-- worktree-based projects (a `work-on-issue.sh` that branches per issue) carry one branch +-- per track, and content/design exploration runs on its own branch(es) too — so branch is +-- the per-track key the survey already has. Sessions with a blank branch are not a track +-- and drop out (they fold into the generic report, never a guessed mode). +-- +-- Per track, tally the signatures the survey already reads (all repo-scoped, subagent-free): +-- veto — `[Request interrupted` / `doesn't want to proceed` markers in the messages +-- loop — `worktree` / `work-on-issue` markers (the mechanical issue→worktree→PR loop) +-- passed — answered/approved AskUserQuestion/ExitPlanMode decisions (gate-pass) +-- rejected — the user-doesn't-want-to-proceed decisions (gate-fail / cancelled path) +-- code — Edit/Write to a code file (`.go`/`.ts`/`.py`/`.rs`/`.js`/`.tsx`/`.go`…) +-- prose — Edit/Write to a `.md` content/doc file +-- Score the two signatures and label by the DOMINANT one with a MARGIN guard: +-- mechanical signature = loop present + gate-pass-dominant + zero veto + code-heavy +-- exploration signature = veto present + a rejected/cancelled path + prose-heavy +-- A label is assigned ONLY when one score beats the other by >= 2 (a clear dominance); a +-- track with neither clearly dominant stays `unlabeled` and the report gives it the generic +-- book-keeping offer — NEVER a guessed automation pitch (the asymmetry favors not +-- mis-offering: a missed automation offer is a cheap omission; a wrong automation pitch at +-- creative work is the misread to avoid). The report reads `mode` per track to pick the offer. +WITH track_sessions AS ( + SELECT s.id, s.git_branch AS track + FROM sessions s + WHERE s.agent = 'claude' + AND s.git_branch <> '' + AND s.file_path NOT LIKE '%/subagents/%' + AND (s.cwd = :repo_root OR s.cwd LIKE :repo_root || '/%') +), +vetoes AS ( + SELECT ts.track, COUNT(*) AS n + FROM track_sessions ts JOIN messages m ON m.session_id = ts.id + WHERE m.content LIKE '%[Request interrupted%' + OR m.content LIKE '%doesn''t want to proceed%' + GROUP BY ts.track +), +loops AS ( + SELECT ts.track, COUNT(*) AS n + FROM track_sessions ts JOIN messages m ON m.session_id = ts.id + WHERE m.content LIKE '%worktree%' OR m.content LIKE '%work-on-issue%' + GROUP BY ts.track +), +passed AS ( + SELECT ts.track, COUNT(*) AS n + FROM track_sessions ts JOIN tool_calls t ON t.session_id = ts.id + WHERE t.tool_name IN ('AskUserQuestion', 'ExitPlanMode') + AND (t.result_content LIKE 'Your questions have been answered%' + OR t.result_content LIKE 'Your question has been answered%' + OR t.result_content LIKE 'User has answered%' + OR t.result_content LIKE 'User has approved your plan%' + OR t.result_content LIKE 'User approved%') + GROUP BY ts.track +), +rejected AS ( + SELECT ts.track, COUNT(*) AS n + FROM track_sessions ts JOIN tool_calls t ON t.session_id = ts.id + WHERE t.tool_name IN ('AskUserQuestion', 'ExitPlanMode') + AND t.result_content LIKE '%doesn''t want to proceed%' + GROUP BY ts.track +), +edits AS ( + SELECT ts.track, + SUM(CASE WHEN fp LIKE '%.md' THEN 1 ELSE 0 END) AS prose, + SUM(CASE WHEN fp LIKE '%.go' OR fp LIKE '%.ts' OR fp LIKE '%.tsx' + OR fp LIKE '%.py' OR fp LIKE '%.rs' OR fp LIKE '%.js' THEN 1 ELSE 0 END) AS code + FROM track_sessions ts + JOIN ( + SELECT session_id, json_extract(input_json, '$.file_path') AS fp + FROM tool_calls WHERE tool_name IN ('Edit', 'Write') + ) e ON e.session_id = ts.id + GROUP BY ts.track +), +sig AS ( + SELECT t.track, + COALESCE(v.n, 0) AS veto, COALESCE(l.n, 0) AS loop, + COALESCE(p.n, 0) AS passed, COALESCE(r.n, 0) AS rejected, + COALESCE(e.code, 0) AS code, COALESCE(e.prose, 0) AS prose + FROM (SELECT DISTINCT track FROM track_sessions) t + LEFT JOIN vetoes v ON v.track = t.track + LEFT JOIN loops l ON l.track = t.track + LEFT JOIN passed p ON p.track = t.track + LEFT JOIN rejected r ON r.track = t.track + LEFT JOIN edits e ON e.track = t.track +), +scored AS ( + SELECT *, + ((loop > 0) + (passed > rejected) + (veto = 0) + (code > prose)) AS mech, + ((veto > 0) + (rejected > 0) + (prose > code)) AS expl + FROM sig +) +SELECT track, + CASE + WHEN mech - expl >= 2 THEN 'mechanical' + WHEN expl - mech >= 2 THEN 'exploration' + ELSE 'unlabeled' + END AS mode +FROM scored +ORDER BY track;