From c8f15c7443bbbe8c30aa914c1787c95015e3884b Mon Sep 17 00:00:00 2001 From: CL Kao Date: Sat, 13 Jun 2026 00:28:29 -0700 Subject: [PATCH 1/5] survey: add dispatch-fact + decision-no-followup queries, knowledge-work mode, manual rename Query-level ACs (AC-1b/AC-3/AC-5/R6#1) for the survey output redesign: - dispatch-fact (za): count distinct in-repo parents that dispatched subagents + total dispatched, via relationship_type='subagent' + parent_session_id parent-join, scoped to the body's exact parent scope. - decision-no-followup (9h): count `done` decisions with no later Edit/Write in the same session, ordered by the real tool_calls.message_id -> messages.ordinal chronological join (not tool_calls.id insertion order). - mode-classification: rename 'mechanical' -> 'manual' for repetitive-but-substantive tracks; add the knowledge-work branch (intake/process/file/log/close loop + content/ops edits, gated on the loop marker, scored beside manual/exploration with the margin guard). - Extend testdata/survey/fixture-sessions.sql: tool_calls.message_id + messages.ordinal, sessions.parent_session_id + relationship_type; seed dispatch (2|3), no-followup (1), knowledge-work (notes-ops), all-unlabeled-Codex mutation. Each expected value is a fixture-derived oracle with a non-vacuous mutation (higher-ordinal Edit decrements no-followup; out-of-repo subagent re-point flips dispatch; kloop-strip drops knowledge-work). 14/14 TestSurveyQuerySmoke subtests pass; 26/26 integration package. Co-Authored-By: Claude Opus 4.8 (1M context) --- skills/integration/survey_queries_test.go | 154 ++++++++++-- .../testdata/survey/fixture-sessions.sql | 230 +++++++++++++++--- skills/survey/references/queries.sql | 126 ++++++++-- 3 files changed, 443 insertions(+), 67 deletions(-) diff --git a/skills/integration/survey_queries_test.go b/skills/integration/survey_queries_test.go index aa6a7c39..4ddd9c6d 100644 --- a/skills/integration/survey_queries_test.go +++ b/skills/integration/survey_queries_test.go @@ -151,6 +151,7 @@ func TestSurveyQuerySmoke(t *testing.T) { for _, name := range []string{ "scoping", "codex-presence", "codex-scoped", "codex-workstreams", "codex-activity", "scaffold-usage", "work-by-area", "decision-open", "mode-classification", + "dispatch-fact", "decision-no-followup", } { if _, ok := queries[name]; !ok { t.Fatalf("recommended-SQL reference is missing the %q query (have: %v)", name, sortedQueryNames(queries)) @@ -161,8 +162,10 @@ func TestSurveyQuerySmoke(t *testing.T) { // shares ONE `project` key; the cwd-prefix-union does the load-bearing work — it counts // the in-repo Claude sessions (cwd AT root, subdir, worktree, plus the F/G worktree-shape // + the mode-classification track sessions, all under the prefix) and EXCLUDES the - // blank-cwd session, the out-of-repo session, and ALL the codex rows. The fixture has 9 - // in-repo Claude sessions: A,B,C + WT + issue-feed×2 + landing-copy×2 + mixed-bag. + // blank-cwd session, the out-of-repo session, the dispatched SUBAGENT sessions (file_path + // under %/subagents/%), and ALL the codex rows. The fixture has 15 in-repo non-subagent + // Claude sessions: A,B,C + WT + issue-feed×2 + landing-copy×2 + mixed-bag (9) + the two + // dispatch parents + two decision-no-followup + two knowledge-work `notes-ops` (6). t.Run("scoping", func(t *testing.T) { rows := runQuery(t, db, queries["scoping"]) if len(rows) != 1 { @@ -172,8 +175,8 @@ func TestSurveyQuerySmoke(t *testing.T) { if len(fields) != 3 { t.Fatalf("scoping row should have 3 fields (sessions|blank_cwd|span) — folded_keys is dropped, got: %q", rows[0]) } - if fields[0] != "9" { - t.Errorf("the cwd-prefix should count 9 in-repo Claude sessions, got sessions=%q", fields[0]) + if fields[0] != "15" { + t.Errorf("the cwd-prefix should count 15 in-repo non-subagent Claude sessions, got sessions=%q", fields[0]) } if fields[1] != "0" { t.Errorf("the blank-cwd Claude session is outside the prefix and must not count, got blank_cwd=%q", fields[1]) @@ -299,15 +302,15 @@ func TestSurveyQuerySmoke(t *testing.T) { }) // no-union (AC-2c): the added Codex rows must NOT inflate the Claude scope. The scoping - // query is asserted to 9 above (the Claude-only in-repo count), proving Codex stays out + // query is asserted to 15 above (the Claude-only in-repo count), proving Codex stays out // of the Claude `sessions` count — a flagged presence, never a silent project union. t.Run("codex-not-folded-into-scope", func(t *testing.T) { rows := runQuery(t, db, queries["scoping"]) if len(rows) != 1 { t.Fatalf("scoping should return one summary row, got %d: %v", len(rows), rows) } - if sessions := strings.Split(rows[0], "|")[0]; sessions != "9" { - t.Errorf("the Codex rows must not be folded into the Claude scope; scoping.sessions should stay 9, got %q", sessions) + if sessions := strings.Split(rows[0], "|")[0]; sessions != "15" { + t.Errorf("the Codex rows must not be folded into the Claude scope; scoping.sessions should stay 15, got %q", sessions) } }) @@ -366,8 +369,10 @@ func TestSurveyQuerySmoke(t *testing.T) { t.Errorf("a worktree edit must NOT bucket as `.worktrees` — the physical prefix must be stripped; got %v", edits) } // `.claude/worktrees//internal/codex.go` strips to `internal` (the second worktree layout). - if edits["internal"] != "4" { - t.Errorf("internal should count 4 (build.go, parse.go, index.go, the .claude/worktrees-stripped codex.go), got %q in %v", edits["internal"], edits) + // internal counts 6: build.go, parse.go, index.go, the .claude/worktrees-stripped codex.go, + // plus the two decision-no-followup `internal/cache/` edits (warm.go, impl.go). + if edits["internal"] != "6" { + t.Errorf("internal should count 6 (build.go, parse.go, index.go, .claude/worktrees-stripped codex.go, + 2 decision-no-followup cache edits), got %q in %v", edits["internal"], edits) } // genuine config demotes to kind=config (still counted), NOT filtered. for _, c := range []string{".claude", ".beads", ""} { @@ -399,14 +404,19 @@ func TestSurveyQuerySmoke(t *testing.T) { } }) - // mode-classification (#324, G / AC-8a): classify each TRACK (keyed by git_branch) into a - // work MODE from the per-track signal tallies (veto density, gate-pass ratio, loop markers, - // edit-kind). The fixture carries a MECHANICAL track (issue-feed: gate-pass, worktree loop, - // code edits, no veto), an EXPLORATION track (landing-copy: vetoes, a rejected path, .md - // edits), and a NEITHER-DOMINANT track (mixed-bag → unlabeled). The labels DERIVE from the + // mode-classification (#324, G / AC-6#2 vocab + AC-5 archetype): classify each TRACK (keyed + // by git_branch) into a work MODE from the per-track signal tallies (veto density, gate-pass + // ratio, loop markers, edit-kind). The fixture carries a MANUAL track (issue-feed: gate-pass, + // worktree loop, code edits, no veto — the repetitive-but-substantive drive loop, the label + // renamed from `mechanical`), an EXPLORATION track (landing-copy: vetoes, a rejected path, + // .md edits), a KNOWLEDGE-WORK track (notes-ops: intake→process→file→log→close markers + + // content/ops `.md`+`.json` edits + a gate-pass batch confirm + zero veto + no issue→PR + // loop), and a NEITHER-DOMINANT track (mixed-bag → unlabeled). The labels DERIVE from the // signal rows (the independent oracle), never from SKILL.md text. Non-vacuous: (i) swapping - // the mechanical track's rows to carry high vetoes + a rejected path + prose flips its label - // to exploration; (ii) the neither-dominant track stays unlabeled (no guessed automation). + // the manual track's rows to carry high vetoes + a rejected path + prose flips its label + // to exploration; (ii) the neither-dominant track stays unlabeled; (iii) stripping the + // knowledge-work loop markers drops notes-ops back to unlabeled (the kloop marker is the + // load-bearing gate of the knowledge-work score). t.Run("mode-classification", func(t *testing.T) { rows := runQuery(t, db, queries["mode-classification"]) mode := map[string]string{} @@ -417,15 +427,25 @@ func TestSurveyQuerySmoke(t *testing.T) { } mode[f[0]] = f[1] } - if mode["issue-feed"] != "mechanical" { - t.Errorf("the gate-pass/worktree-loop/code track should classify mechanical, got %q in %v", mode["issue-feed"], mode) + if mode["issue-feed"] != "manual" { + t.Errorf("the gate-pass/worktree-loop/code track should classify manual (the `mechanical`→`manual` rename), got %q in %v", mode["issue-feed"], mode) } if mode["landing-copy"] != "exploration" { t.Errorf("the high-veto/rejected/prose track should classify exploration, got %q in %v", mode["landing-copy"], mode) } + if mode["notes-ops"] != "knowledge-work" { + t.Errorf("the intake→process→file→log→close + content/ops-edits track should classify knowledge-work, got %q in %v", mode["notes-ops"], mode) + } if mode["mixed-bag"] != "unlabeled" { t.Errorf("a neither-dominant track must stay unlabeled (generic book-keeping, never a guessed automation pitch), got %q in %v", mode["mixed-bag"], mode) } + // the `mechanical` label must NOT appear — it was renamed to `manual` everywhere the + // classifier emits it (reserved for genuinely-trivial tracks the classifier doesn't detect). + for track, m := range mode { + if m == "mechanical" { + t.Errorf("no track may classify `mechanical` after the rename; track %q is mechanical in %v", track, mode) + } + } // non-vacuous (i): swap issue-feed's signals (high veto + rejected path + prose) → flips to exploration. db2 := buildFixtureDB(t) execSQLite(t, db2, `UPDATE messages SET content='[Request interrupted by user]' WHERE session_id='claude:91111111-1111-1111-1111-111111111111';`) @@ -439,11 +459,23 @@ func TestSurveyQuerySmoke(t *testing.T) { flipped[f[0]] = f[1] } if flipped["issue-feed"] != "exploration" { - t.Errorf("swapping the mechanical track's signals to the exploration signature must flip its label, got %q in %v", flipped["issue-feed"], flipped) + t.Errorf("swapping the manual track's signals to the exploration signature must flip its label, got %q in %v", flipped["issue-feed"], flipped) } if flipped["mixed-bag"] != "unlabeled" { t.Errorf("the neither-dominant track must stay unlabeled under the signal swap, got %q in %v", flipped["mixed-bag"], flipped) } + // non-vacuous (iii): strip the knowledge-work loop markers → notes-ops drops to unlabeled, + // proving the intake→process→file→log→close marker gates the knowledge-work score. + db3 := buildFixtureDB(t) + execSQLite(t, db3, `UPDATE messages SET content='ordinary work' WHERE session_id IN ('claude:c6111111-0000-0000-0000-000000000001','claude:c6222222-0000-0000-0000-000000000002');`) + stripped := map[string]string{} + for _, r := range runQuery(t, db3, queries["mode-classification"]) { + f := strings.Split(r, "|") + stripped[f[0]] = f[1] + } + if stripped["notes-ops"] != "unlabeled" { + t.Errorf("stripping the knowledge-work loop markers must drop notes-ops to unlabeled, got %q in %v", stripped["notes-ops"], stripped) + } }) // decision-open (#320): the rejected AskUserQuestion is OPEN; the ExitPlanMode @@ -476,6 +508,90 @@ func TestSurveyQuerySmoke(t *testing.T) { t.Errorf("the OPEN frontier must sort first so the LIMIT cannot hide it, got leading row: %q", rows[0]) } }) + + // dispatch-fact (#za, AC-3): count orchestration over the body's exact parent scope — + // a subagent (relationship_type='subagent') counts only when its PARENT is an in-repo, + // non-subagent Claude session. The fixture seeds two in-repo parents (P1→2 subagents, + // P2→1 ⇒ distinct=2, total=3) and one subagent of the OUT-of-repo session E (must NOT + // count). The expected 2|3 derives from the seeded rows. Non-vacuous: re-pointing the + // out-of-repo subagent's parent to an in-repo parent flips the counts 2|3→3|4, proving + // the parent-scope filter is load-bearing, not a constant. + t.Run("dispatch-fact", func(t *testing.T) { + rows := runQuery(t, db, queries["dispatch-fact"]) + if len(rows) != 1 { + t.Fatalf("dispatch-fact should return one summary row, got %d: %v", len(rows), rows) + } + fields := strings.Split(rows[0], "|") + if len(fields) != 2 { + t.Fatalf("dispatch-fact row should have 2 fields (sessions_that_orchestrated|subagents_dispatched), got: %q", rows[0]) + } + if fields[0] != "2" { + t.Errorf("dispatch-fact should count 2 distinct in-repo orchestrating parents (E's out-of-repo parent excluded), got %q", fields[0]) + } + if fields[1] != "3" { + t.Errorf("dispatch-fact should count 3 in-repo-parented subagents (E's subagent excluded), got %q", fields[1]) + } + // non-vacuous: re-point the out-of-repo subagent's parent to an in-repo parent → 2|3 becomes 3|4. + db2 := buildFixtureDB(t) + execSQLite(t, db2, `UPDATE sessions SET parent_session_id='claude:aaaaaaaa-1111-2222-3333-444444444444' WHERE id='claude:e4444444-0000-0000-0000-000000000004';`) + flipped := strings.Split(runQuery(t, db2, queries["dispatch-fact"])[0], "|") + if flipped[0] != "3" || flipped[1] != "4" { + t.Errorf("re-pointing the out-of-repo subagent to an in-repo parent must flip dispatch-fact 2|3→3|4 (parent-scope is load-bearing), got %s|%s", flipped[0], flipped[1]) + } + }) + + // decision-no-followup (#9h, AC-1b): count `done` decisions with NO Edit/Write at a higher + // message ordinal in the same session. "Later" is the REAL chronological order via + // tool_calls.message_id → messages.id → messages.ordinal — NOT tool_calls.id insertion + // order. The fixture seeds NF1 (a done decision at ordinal 2 with an Edit at ordinal 1 + // BEFORE it → counts) and NF2 (a done decision at ordinal 2 with a Write at ordinal 3 + // AFTER it → does not). The existing decision tool_calls carry NULL message_id so they do + // not join — the oracle is exactly 1. Non-vacuous: inserting an Edit at a HIGHER ordinal + // than NF1's decision decrements the count 1→0, proving the ordinal compare (not insertion + // order: NF1's qualifying Edit has the LOWER tool_calls.id) is load-bearing. + t.Run("decision-no-followup", func(t *testing.T) { + rows := runQuery(t, db, queries["decision-no-followup"]) + if len(rows) != 1 { + t.Fatalf("decision-no-followup should return one count row, got %d: %v", len(rows), rows) + } + if rows[0] != "1" { + t.Errorf("decision-no-followup should count 1 (NF1 has no later edit; NF2 does), got %q", rows[0]) + } + // non-vacuous: insert an Edit at ordinal 3 in NF1 (higher than its decision's ordinal 2) + // → the count drops 1→0. The Edit's tool_calls.id is the highest, so a wrong insertion-order + // join would behave differently — proving the message_id→ordinal chronological join is load-bearing. + db2 := buildFixtureDB(t) + execSQLite(t, db2, `INSERT INTO messages (id,session_id,ordinal,role,content) VALUES (199,'claude:f0111111-0000-0000-0000-000000000001',3,'assistant','late edit');`) + execSQLite(t, db2, `INSERT INTO tool_calls (id,session_id,message_id,tool_name,input_json) VALUES (199,'claude:f0111111-0000-0000-0000-000000000001','199','Edit','{"file_path":"/repo/proj/internal/cache/late.go"}');`) + flipped := runQuery(t, db2, queries["decision-no-followup"]) + if flipped[0] != "0" { + t.Errorf("inserting an Edit at a higher ordinal than NF1's decision must drop decision-no-followup 1→0 (chronological join is load-bearing), got %q", flipped[0]) + } + }) + + // codex-workstreams all-unlabeled collapse (#h5, AC-4a query side): when every Codex-scoped + // session is unclassifiable, codex-workstreams returns ONLY `(unlabeled)` rows — the shape + // the render then collapses to a single honest "N sessions, unclassified" line instead of a + // (unlabeled)-only breakdown. The BASE fixture has ≥2 NAMED clusters (the conditional + // contrast: the render KEEPS the breakdown), already asserted in the codex-workstreams + // sub-test above. Here we mutate the named first_messages to encouragement/meta and assert + // the query collapses to a single all-(unlabeled) row — proving the collapse is conditional + // on the all-unlabeled shape, not a blanket removal. + t.Run("codex-workstreams-all-unlabeled", func(t *testing.T) { + db2 := buildFixtureDB(t) + execSQLite(t, db2, `UPDATE sessions SET first_message='You totally got this. Keep going, friend.' WHERE id IN ('codex:ffffffff-8888-9999-aaaa-bbbbbbbbbbbb','codex:f2f2f2f2-0000-1111-2222-333333333333','codex:f4f4f4f4-8888-9999-aaaa-bbbbbbbbbbbb');`) + rows := runQuery(t, db2, queries["codex-workstreams"]) + if len(rows) != 1 { + t.Fatalf("an all-unlabeled Codex set must collapse to ONE codex-workstreams row, got %d: %v", len(rows), rows) + } + f := strings.Split(rows[0], "|") + if f[0] != "(unlabeled)" { + t.Errorf("the single collapsed row must be (unlabeled), got %q", f[0]) + } + if f[1] != "4" { + t.Errorf("all 4 codex-scoped sessions should cluster into the single (unlabeled) row, got count %q", f[1]) + } + }) } // sortedQueryNames returns the labeled query names for a diagnostic message. diff --git a/skills/integration/testdata/survey/fixture-sessions.sql b/skills/integration/testdata/survey/fixture-sessions.sql index 7c993130..8dffa741 100644 --- a/skills/integration/testdata/survey/fixture-sessions.sql +++ b/skills/integration/testdata/survey/fixture-sessions.sql @@ -58,12 +58,21 @@ CREATE TABLE sessions ( ended_at TEXT, first_message TEXT, message_count INTEGER, - user_message_count INTEGER + user_message_count INTEGER, + -- parent_session_id + relationship_type are production-shaped (agentsview v0.32.1): a + -- dispatched subagent carries relationship_type='subagent' and links back to its dispatcher + -- via parent_session_id. The dispatch-fact query (#za) joins these to count orchestration. + parent_session_id TEXT, + relationship_type TEXT NOT NULL DEFAULT '' ); CREATE TABLE tool_calls ( id INTEGER PRIMARY KEY, session_id TEXT, + -- message_id links a tool call to the message it was emitted from (agentsview v0.32.1). + -- The decision-no-followup query (#9h) joins message_id → messages.id → messages.ordinal + -- to compare decisions and Edits by REAL chronological order, not tool_calls.id insertion order. + message_id TEXT, tool_name TEXT, category TEXT NOT NULL DEFAULT '', skill_name TEXT, @@ -74,6 +83,9 @@ CREATE TABLE tool_calls ( CREATE TABLE messages ( id INTEGER PRIMARY KEY, session_id TEXT, + -- ordinal is the message's chronological position within its session (NOT NULL in + -- agentsview v0.32.1). The decision-no-followup query orders decisions and Edits by it. + ordinal INTEGER, role TEXT, content TEXT ); @@ -92,7 +104,8 @@ INSERT INTO sessions VALUES ('claude:aaaaaaaa-1111-2222-3333-444444444444', 'proj', 'claude', '/repo/proj', 'main', '/u/.claude/projects/-repo-proj/aaaaaaaa.jsonl', - '2026-06-05', '2026-06-05', 'Pick up the parser refactor and ship it.', 8, 3); + '2026-06-05', '2026-06-05', 'Pick up the parser refactor and ship it.', 8, 3, + NULL, ''); -- Claude session B — a SUBDIR checkout (the split-root state dir). agentsview keys it by -- the git-root basename, so its `project` is `proj`, the SAME key as the root — the @@ -101,7 +114,8 @@ INSERT INTO sessions VALUES ('claude:bbbbbbbb-5555-6666-7777-888888888888', 'proj', 'claude', '/repo/proj/docs/dev/.spacedock-state', 'main', '/u/.claude/projects/-repo-proj-docs-dev-_spacedock_state/bbbbbbbb.jsonl', - '2026-06-06', '2026-06-06', 'Now wire up the regression suite.', 6, 2); + '2026-06-06', '2026-06-06', 'Now wire up the regression suite.', 6, 2, + NULL, ''); -- Claude session C — a WORKTREE-style checkout. Same git-root basename, so `project` is -- again `proj` — placed in scope by the cwd-prefix, not a distinct key. Carries the @@ -111,7 +125,8 @@ INSERT INTO sessions VALUES ('claude:cccccccc-9999-aaaa-bbbb-cccccccccccc', 'proj', 'claude', '/repo/proj/.worktrees/feature-x', 'feature-x', '/u/.claude/projects/-repo-proj-.worktrees-feature-x/cccccccc.jsonl', - '2026-06-06', '2026-06-06', 'Build the feature behind a worktree.', 5, 2); + '2026-06-06', '2026-06-06', 'Build the feature behind a worktree.', 5, 2, + NULL, ''); -- Claude session D — a BLANK cwd (production stores blank as ''). Under no prefix; it -- must NOT count toward the repo scope, and the blank-cwd tally surfaces it. @@ -119,7 +134,8 @@ INSERT INTO sessions VALUES ('claude:dddddddd-0000-1111-2222-333333333333', '', 'claude', '', '', '/u/.claude/projects/unknown/dddddddd.jsonl', - '2026-06-03', '2026-06-03', 'A session whose cwd agentsview never captured.', 3, 1); + '2026-06-03', '2026-06-03', 'A session whose cwd agentsview never captured.', 3, 1, + NULL, ''); -- Claude session E — OUTSIDE the repo prefix entirely (a different project on the same -- machine). The scoping query MUST exclude it; its Skill row MUST NOT inflate the tally. @@ -127,7 +143,8 @@ INSERT INTO sessions VALUES ('claude:eeeeeeee-4444-5555-6666-777777777777', 'otherproj', 'claude', '/elsewhere/otherproj', 'main', '/u/.claude/projects/-elsewhere-otherproj/eeeeeeee.jsonl', - '2026-06-02', '2026-06-02', 'Unrelated project that shares the machine.', 4, 1); + '2026-06-02', '2026-06-02', 'Unrelated project that shares the machine.', 4, 1, + NULL, ''); -- ============================================================================ -- CODEX SESSIONS — four attributed to THIS repo (F, F2, F3, F4) + one same-basename @@ -151,7 +168,7 @@ INSERT INTO sessions VALUES '/u/.codex/sessions/rollout-ffffffff.jsonl', '2026-06-04', '2026-06-04', 'Read /tmp/spacedock-dispatch/spacedock-ensign-journey-cost-ledger-implementation.md and treat its content as your assignment.', - 4, 1); + 4, 1, NULL, ''); -- Codex F2 — TASK/ENTITY backtick pattern → workstream `orient-workflow-discovery`. A -- leading reviewer-label backtick precedes the keyword so the rule must anchor on the @@ -162,7 +179,7 @@ INSERT INTO sessions VALUES '/u/.codex/sessions/rollout-f2f2f2f2.jsonl', '2026-06-04', '2026-06-04', 'You are `142-validation/Ensign`, a fresh validation worker for Spacedock entity 142 `orient-workflow-discovery`. Working directory: /repo/proj.', - 4, 1); + 4, 1, NULL, ''); -- Codex F3 — UNLABELED. An encouragement/meta first_message carries no task → (unlabeled). INSERT INTO sessions VALUES @@ -171,7 +188,7 @@ INSERT INTO sessions VALUES '/u/.codex/sessions/rollout-f3f3f3f3.jsonl', '2026-06-04', '2026-06-04', 'You totally got this. Take your time. Captain asked me to tell subagents they are appreciated.', - 3, 1); + 3, 1, NULL, ''); -- Codex F4 — a SECOND distinct DISPATCH task → workstream `codex-live-ci`. Proves the -- cluster key is the extracted {TASK}, not a constant: F4 must NOT merge with F. @@ -181,7 +198,7 @@ INSERT INTO sessions VALUES '/u/.codex/sessions/rollout-f4f4f4f4.jsonl', '2026-06-04', '2026-06-04', 'Read /tmp/spacedock-dispatch/spacedock-ensign-codex-live-ci-validation.md and treat its content as your assignment.', - 4, 1); + 4, 1, NULL, ''); -- Codex session G — a SAME-BASENAME SIBLING repo's Codex history. Its git-root basename is -- also `proj`, so it keys to the identical `project` and codex-presence CANNOT distinguish @@ -194,7 +211,7 @@ INSERT INTO sessions VALUES '/u/.codex/sessions/rollout-11111111.jsonl', '2026-06-04', '2026-06-04', 'Read /tmp/spacedock-dispatch/spacedock-ensign-sibling-task-implementation.md and treat its content as your assignment.', - 3, 1); + 3, 1, NULL, ''); -- ---------------------------------------------------------------------------- -- CODEX exec_command rows carry $.workdir — the attribution signal (#321 codex-scoped) and @@ -292,7 +309,8 @@ INSERT INTO sessions VALUES ('claude:77777777-aaaa-bbbb-cccc-dddddddddddd', 'proj', 'claude', '/repo/proj/.worktrees/issue-42', 'issue-42', '/u/.claude/projects/-repo-proj-.worktrees-issue-42/77777777.jsonl', - '2026-06-07', '2026-06-07', 'Run the work-on-issue loop for issue 42 in its worktree.', 6, 2); + '2026-06-07', '2026-06-07', 'Run the work-on-issue loop for issue 42 in its worktree.', 6, 2, + NULL, ''); -- Worktree-attribution Edit/Write rows: two worktree `src/` edits (strip to `src`), a -- main-checkout `src/` edit (also `src` — all three bucket together), a `docs/` product @@ -340,11 +358,13 @@ INSERT INTO sessions VALUES ('claude:91111111-1111-1111-1111-111111111111', 'proj', 'claude', '/repo/proj', 'issue-feed', '/u/.claude/projects/-repo-proj/91111111.jsonl', - '2026-06-07', '2026-06-07', 'Drive the issue-feed renderer via the work-on-issue loop.', 5, 2), + '2026-06-07', '2026-06-07', 'Drive the issue-feed renderer via the work-on-issue loop.', 5, 2, + NULL, ''), ('claude:92222222-2222-2222-2222-222222222222', 'proj', 'claude', '/repo/proj/.worktrees/issue-feed', 'issue-feed', '/u/.claude/projects/-repo-proj-.worktrees-issue-feed/92222222.jsonl', - '2026-06-07', '2026-06-07', 'Continue the issue-feed worktree implementation.', 4, 2); + '2026-06-07', '2026-06-07', 'Continue the issue-feed worktree implementation.', 4, 2, + NULL, ''); INSERT INTO tool_calls (id, session_id, tool_name, input_json, result_content) VALUES (60, 'claude:91111111-1111-1111-1111-111111111111', 'AskUserQuestion', '{"questions":[{"header":"Reindex strategy","question":"Incremental vs full reindex?"}]}', @@ -359,11 +379,13 @@ INSERT INTO sessions VALUES ('claude:a3333333-3333-3333-3333-333333333333', 'proj', 'claude', '/repo/proj', 'landing-copy', '/u/.claude/projects/-repo-proj/a3333333.jsonl', - '2026-06-07', '2026-06-07', 'Draft the landing hero copy; try a few framings.', 7, 4), + '2026-06-07', '2026-06-07', 'Draft the landing hero copy; try a few framings.', 7, 4, + NULL, ''), ('claude:a4444444-4444-4444-4444-444444444444', 'proj', 'claude', '/repo/proj', 'landing-copy', '/u/.claude/projects/-repo-proj/a4444444.jsonl', - '2026-06-07', '2026-06-07', 'Rework the story section; the last direction was wrong.', 6, 3); + '2026-06-07', '2026-06-07', 'Rework the story section; the last direction was wrong.', 6, 3, + NULL, ''); INSERT INTO tool_calls (id, session_id, tool_name, input_json, result_content) VALUES (63, 'claude:a3333333-3333-3333-3333-333333333333', 'AskUserQuestion', '{"questions":[{"header":"Hero framing","question":"Hero-vs-story framing?"}]}', @@ -378,7 +400,8 @@ INSERT INTO sessions VALUES ('claude:b5555555-5555-5555-5555-555555555555', 'proj', 'claude', '/repo/proj', 'mixed-bag', '/u/.claude/projects/-repo-proj/b5555555.jsonl', - '2026-06-07', '2026-06-07', 'Some odds and ends across the repo.', 4, 2); + '2026-06-07', '2026-06-07', 'Some odds and ends across the repo.', 4, 2, + NULL, ''); INSERT INTO tool_calls (id, session_id, tool_name, input_json, result_content) VALUES (66, 'claude:b5555555-5555-5555-5555-555555555555', 'AskUserQuestion', '{"questions":[{"header":"Odds and ends","question":"Which loose end first?"}]}', @@ -390,20 +413,167 @@ INSERT INTO tool_calls (id, session_id, tool_name, input_json, result_content) V -- Veto + loop markers in the message stream (interruption + mechanical-loop signals, -- prose-read). Session B carries the original veto; the G tracks carry their signatures. -- ---------------------------------------------------------------------------- -INSERT INTO messages VALUES - (1, 'claude:aaaaaaaa-1111-2222-3333-444444444444', 'user', 'Pick up the parser refactor and ship it.'), - (2, 'claude:bbbbbbbb-5555-6666-7777-888888888888', 'user', 'Now wire up the regression suite.'), - (3, 'claude:bbbbbbbb-5555-6666-7777-888888888888', 'user', '[Request interrupted by user]'), - (4, 'claude:cccccccc-9999-aaaa-bbbb-cccccccccccc', 'user', 'Build the feature behind a worktree.'), - (5, 'codex:ffffffff-8888-9999-aaaa-bbbbbbbbbbbb', 'user', 'A codex session in this repo; cwd unrecorded.'), +-- The ordinal column carries each message's chronological position within its session +-- (production: NOT NULL). These mode-classification/veto messages read only `content`; the +-- ordinal-load-bearing rows live in the DECISION-NO-FOLLOWUP block below. +INSERT INTO messages (id, session_id, ordinal, role, content) VALUES + (1, 'claude:aaaaaaaa-1111-2222-3333-444444444444', 1, 'user', 'Pick up the parser refactor and ship it.'), + (2, 'claude:bbbbbbbb-5555-6666-7777-888888888888', 1, 'user', 'Now wire up the regression suite.'), + (3, 'claude:bbbbbbbb-5555-6666-7777-888888888888', 2, 'user', '[Request interrupted by user]'), + (4, 'claude:cccccccc-9999-aaaa-bbbb-cccccccccccc', 1, 'user', 'Build the feature behind a worktree.'), + (5, 'codex:ffffffff-8888-9999-aaaa-bbbbbbbbbbbb', 1, 'user', 'A codex session in this repo; cwd unrecorded.'), -- WT (issue-42): worktree loop marker, no veto → reinforces mechanical. - (6, 'claude:77777777-aaaa-bbbb-cccc-dddddddddddd', 'user', 'Run the work-on-issue loop for issue 42 in its worktree.'), + (6, 'claude:77777777-aaaa-bbbb-cccc-dddddddddddd', 1, 'user', 'Run the work-on-issue loop for issue 42 in its worktree.'), -- issue-feed (mechanical): worktree/work-on-issue loop markers, no veto. - (7, 'claude:91111111-1111-1111-1111-111111111111', 'user', 'Drive the work-on-issue loop in the worktree.'), - (8, 'claude:92222222-2222-2222-2222-222222222222', 'user', 'Continue the worktree implementation.'), + (7, 'claude:91111111-1111-1111-1111-111111111111', 1, 'user', 'Drive the work-on-issue loop in the worktree.'), + (8, 'claude:92222222-2222-2222-2222-222222222222', 1, 'user', 'Continue the worktree implementation.'), -- landing-copy (exploration): repeated vetoes / doesn't-want-to-proceed steering. - (9, 'claude:a3333333-3333-3333-3333-333333333333', 'user', '[Request interrupted by user]'), - (10, 'claude:a3333333-3333-3333-3333-333333333333', 'user', 'doesn''t want to proceed — try a warmer tone'), - (11, 'claude:a4444444-4444-4444-4444-444444444444', 'user', '[Request interrupted by user] rethink the framing'), + (9, 'claude:a3333333-3333-3333-3333-333333333333', 1, 'user', '[Request interrupted by user]'), + (10, 'claude:a3333333-3333-3333-3333-333333333333', 2, 'user', 'doesn''t want to proceed — try a warmer tone'), + (11, 'claude:a4444444-4444-4444-4444-444444444444', 1, 'user', '[Request interrupted by user] rethink the framing'), -- mixed-bag (neither dominant): a single veto, balancing its one passed decision + one .md edit. - (12, 'claude:b5555555-5555-5555-5555-555555555555', 'user', '[Request interrupted by user]'); + (12, 'claude:b5555555-5555-5555-5555-555555555555', 1, 'user', '[Request interrupted by user]'); + +-- ============================================================================ +-- DISPATCH-FACT (#za). Orchestration: two in-repo Claude parents dispatch subagents (P1→2, +-- P2→1 ⇒ distinct-parents=2, subagents=3), and the OUT-of-repo session E dispatches one +-- subagent that must NOT count (its parent is outside the repo prefix). A subagent row carries +-- relationship_type='subagent' + parent_session_id; its file_path is under `%/subagents/%` +-- (production shape) so the body's parent scope excludes it from every Claude count while +-- dispatch-fact counts it via the parent join. The expected dispatch-fact result is 2|3, +-- derived from these seeded rows. Non-vacuous: re-pointing the out-of-repo subagent's parent +-- to an in-repo parent flips the counts (proving the parent-scope filter is load-bearing). +-- ============================================================================ + +-- Two in-repo orchestrating parents (blank git_branch → not mode-classification tracks). +INSERT INTO sessions VALUES + ('claude:d1111111-0000-0000-0000-000000000001', 'proj', 'claude', + '/repo/proj', '', + '/u/.claude/projects/-repo-proj/d1111111.jsonl', + '2026-06-08', '2026-06-08', 'Orchestrate the parser sweep across subagents.', 5, 2, + NULL, ''), + ('claude:d2222222-0000-0000-0000-000000000002', 'proj', 'claude', + '/repo/proj/internal', '', + '/u/.claude/projects/-repo-proj-internal/d2222222.jsonl', + '2026-06-08', '2026-06-08', 'Dispatch one subagent for the index migration.', 4, 1, + NULL, ''); + +-- Three in-repo subagents (P1 dispatches 2, P2 dispatches 1). Each carries +-- relationship_type='subagent', a parent_session_id, and a `%/subagents/%` file_path so the +-- body's parent-scope filter (file_path NOT LIKE '%/subagents/%') keeps them OUT of the +-- scoping/work-by-area/etc. counts while dispatch-fact counts them through the parent join. +INSERT INTO sessions VALUES + ('claude:e1111111-0000-0000-0000-000000000001', 'proj', 'claude', + '/repo/proj', '', + '/u/.claude/projects/-repo-proj/subagents/e1111111.jsonl', + '2026-06-08', '2026-06-08', 'subagent: refactor the tokenizer.', 3, 0, + 'claude:d1111111-0000-0000-0000-000000000001', 'subagent'), + ('claude:e2222222-0000-0000-0000-000000000002', 'proj', 'claude', + '/repo/proj', '', + '/u/.claude/projects/-repo-proj/subagents/e2222222.jsonl', + '2026-06-08', '2026-06-08', 'subagent: refactor the entrypoint.', 3, 0, + 'claude:d1111111-0000-0000-0000-000000000001', 'subagent'), + ('claude:e3333333-0000-0000-0000-000000000003', 'proj', 'claude', + '/repo/proj/internal', '', + '/u/.claude/projects/-repo-proj-internal/subagents/e3333333.jsonl', + '2026-06-08', '2026-06-08', 'subagent: migrate the index.', 3, 0, + 'claude:d2222222-0000-0000-0000-000000000002', 'subagent'); + +-- A subagent of the OUT-of-repo session E (id=70 below repoints it in the non-vacuous test). +-- Its PARENT (claude:eeeeeeee…) is outside the repo prefix, so dispatch-fact must NOT count it. +INSERT INTO sessions VALUES + ('claude:e4444444-0000-0000-0000-000000000004', 'otherproj', 'claude', + '/elsewhere/otherproj', '', + '/u/.claude/projects/-elsewhere-otherproj/subagents/e4444444.jsonl', + '2026-06-08', '2026-06-08', 'subagent: unrelated repo work.', 3, 0, + 'claude:eeeeeeee-4444-5555-6666-777777777777', 'subagent'); + +-- ============================================================================ +-- DECISION-NO-FOLLOWUP (#9h). A `done` decision (answered AskUserQuestion / approved +-- ExitPlanMode) counts when NO Edit/Write in the SAME session sits at a strictly HIGHER +-- message ordinal. "Later" is the REAL chronological order via tool_calls.message_id → +-- messages.id → messages.ordinal, NOT tool_calls.id insertion order. Two sessions (blank +-- git_branch → not mode-classification tracks): +-- NF1 — a done AskUserQuestion at ordinal 2, with an Edit at ordinal 1 (BEFORE it) → +-- COUNTS (no later edit). The earlier-ordinal edit proves the join is chronological: +-- a naive "any edit in the session" test would wrongly disqualify it. +-- NF2 — a done ExitPlanMode at ordinal 2, with a Write at ordinal 3 (AFTER it) → +-- does NOT count (it has a follow-up). +-- Expected decision-no-followup = 1, derived from these seeded rows (the existing decision +-- tool_calls carry NULL message_id, so they do not join — the oracle is exactly NF1/NF2). +-- Non-vacuous (test): insert an Edit at ordinal 3 in NF1 → the count drops 1→0, proving the +-- ordinal compare (not insertion order) is load-bearing. +-- ============================================================================ +INSERT INTO sessions VALUES + ('claude:f0111111-0000-0000-0000-000000000001', 'proj', 'claude', + '/repo/proj', '', + '/u/.claude/projects/-repo-proj/f0111111.jsonl', + '2026-06-09', '2026-06-09', 'Decide the cache strategy, then move on.', 4, 2, + NULL, ''), + ('claude:f0222222-0000-0000-0000-000000000002', 'proj', 'claude', + '/repo/proj', '', + '/u/.claude/projects/-repo-proj/f0222222.jsonl', + '2026-06-09', '2026-06-09', 'Approve the plan, then implement it.', 4, 2, + NULL, ''); + +-- NF messages — carry the ordinals the chronological join reads. +INSERT INTO messages (id, session_id, ordinal, role, content) VALUES + -- NF1: edit-message ordinal 1, decision-message ordinal 2 (edit is BEFORE the decision). + (101, 'claude:f0111111-0000-0000-0000-000000000001', 1, 'assistant', 'made an early edit'), + (102, 'claude:f0111111-0000-0000-0000-000000000001', 2, 'assistant', 'asked the cache question'), + -- NF2: decision-message ordinal 2, follow-up-edit-message ordinal 3 (edit is AFTER it). + (103, 'claude:f0222222-0000-0000-0000-000000000002', 1, 'user', 'approve the plan'), + (104, 'claude:f0222222-0000-0000-0000-000000000002', 2, 'assistant', 'approved plan'), + (105, 'claude:f0222222-0000-0000-0000-000000000002', 3, 'assistant', 'wrote the implementation'); + +-- NF tool_calls — message_id links each call to its message (so the join can read its ordinal). +INSERT INTO tool_calls (id, session_id, message_id, tool_name, input_json, result_content) VALUES + -- NF1: an Edit at ordinal 1 (message 101), then the done decision at ordinal 2 (message 102). + (80, 'claude:f0111111-0000-0000-0000-000000000001', '101', 'Edit', + '{"file_path":"/repo/proj/internal/cache/warm.go"}', NULL), + (81, 'claude:f0111111-0000-0000-0000-000000000001', '102', 'AskUserQuestion', + '{"questions":[{"header":"Cache strategy","question":"LRU vs LFU?"}]}', + 'Your questions have been answered: "LRU vs LFU?"="LRU"'), + -- NF2: the done decision at ordinal 2 (message 104), then a Write at ordinal 3 (message 105). + (82, 'claude:f0222222-0000-0000-0000-000000000002', '104', 'ExitPlanMode', + '{"plan":"Implement the cache then test it."}', + 'User has approved your plan. You can now start coding.'), + (83, 'claude:f0222222-0000-0000-0000-000000000002', '105', 'Write', + '{"file_path":"/repo/proj/internal/cache/impl.go"}', NULL); + +-- ============================================================================ +-- KNOWLEDGE-WORK track `notes-ops` (#zb #2 / AC-5). A notes/ops shop, not a code repo: the +-- intake→process→file→log→close loop markers + content/ops edits (`.md` + `.json` data) + +-- a gate-pass batch confirm + ZERO veto + NO issue→worktree→PR loop. mode-classification +-- emits `knowledge-work` for it. Non-vacuous (test): stripping the kloop markers drops the +-- track to `unlabeled` (the knowledge-work score gates on the marker's presence). +-- ============================================================================ +INSERT INTO sessions VALUES + ('claude:c6111111-0000-0000-0000-000000000001', 'proj', 'claude', + '/repo/proj', 'notes-ops', + '/u/.claude/projects/-repo-proj/c6111111.jsonl', + '2026-06-10', '2026-06-10', 'Intake the new memos and file them.', 5, 2, + NULL, ''), + ('claude:c6222222-0000-0000-0000-000000000002', 'proj', 'claude', + '/repo/proj', 'notes-ops', + '/u/.claude/projects/-repo-proj/c6222222.jsonl', + '2026-06-10', '2026-06-10', 'Process the batch and log the run.', 4, 2, + NULL, ''); +INSERT INTO tool_calls (id, session_id, tool_name, input_json, result_content) VALUES + -- gate-pass batch confirm (knowledge-work gates: "confirm this batch / approve this write"). + (90, 'claude:c6111111-0000-0000-0000-000000000001', 'AskUserQuestion', + '{"questions":[{"header":"Batch scope","question":"File this batch of memos now?"}]}', + 'Your questions have been answered: "File this batch of memos now?"="yes"'), + -- content/ops edits: `.md` notes + `.json` ledger data (prose + data dominate code). + (91, 'claude:c6111111-0000-0000-0000-000000000001', 'Write', + '{"file_path":"/repo/proj/notes/2026-06-10.md"}', NULL), + (92, 'claude:c6222222-0000-0000-0000-000000000002', 'Write', + '{"file_path":"/repo/proj/notes/index.md"}', NULL), + (93, 'claude:c6222222-0000-0000-0000-000000000002', 'Edit', + '{"file_path":"/repo/proj/ledger/runs.json"}', NULL); +INSERT INTO messages (id, session_id, ordinal, role, content) VALUES + -- knowledge-work loop markers: intake → process → file → log → close. + (110, 'claude:c6111111-0000-0000-0000-000000000001', 1, 'user', 'intake the new memos for today'), + (111, 'claude:c6111111-0000-0000-0000-000000000001', 2, 'user', 'then file the ones that are ready'), + (112, 'claude:c6222222-0000-0000-0000-000000000002', 1, 'user', 'process the batch we queued'), + (113, 'claude:c6222222-0000-0000-0000-000000000002', 2, 'user', 'log the run and close out the day'); diff --git a/skills/survey/references/queries.sql b/skills/survey/references/queries.sql index 8c10bcfc..3cebb000 100644 --- a/skills/survey/references/queries.sql +++ b/skills/survey/references/queries.sql @@ -303,28 +303,38 @@ LIMIT 20; -- name: mode-classification -- #324 (G) — classify each TRACK into a work MODE, so the report can make the RIGHT --- commission offer per track (automation for mechanical, book-keeping for exploration) --- instead of one undifferentiated pitch. The track key is the session's `git_branch`: --- worktree-based projects (a `work-on-issue.sh` that branches per issue) carry one branch --- per track, and content/design exploration runs on its own branch(es) too — so branch is --- the per-track key the survey already has. Sessions with a blank branch are not a track --- and drop out (they fold into the generic report, never a guessed mode). +-- commission offer per track (automation for manual, book-keeping for exploration, +-- batch book-keeping for knowledge-work) instead of one undifferentiated pitch. The track +-- key is the session's `git_branch`: worktree-based projects (a `work-on-issue.sh` that +-- branches per issue) carry one branch per track, and content/design exploration runs on its +-- own branch(es) too — so branch is the per-track key the survey already has. Sessions with a +-- blank branch are not a track and drop out (they fold into the generic report, never a +-- guessed mode). -- -- Per track, tally the signatures the survey already reads (all repo-scoped, subagent-free): -- veto — `[Request interrupted` / `doesn't want to proceed` markers in the messages --- loop — `worktree` / `work-on-issue` markers (the mechanical issue→worktree→PR loop) +-- loop — `worktree` / `work-on-issue` markers (the manual issue→worktree→PR loop) +-- kloop — `intake` / `process` / `file` / `log` / `close` markers (the knowledge-work +-- intake→process→file→log→close loop) -- passed — answered/approved AskUserQuestion/ExitPlanMode decisions (gate-pass) -- rejected — the user-doesn't-want-to-proceed decisions (gate-fail / cancelled path) -- code — Edit/Write to a code file (`.go`/`.ts`/`.py`/`.rs`/`.js`/`.tsx`/`.go`…) -- prose — Edit/Write to a `.md` content/doc file --- Score the two signatures and label by the DOMINANT one with a MARGIN guard: --- mechanical signature = loop present + gate-pass-dominant + zero veto + code-heavy +-- data — Edit/Write to a content/ops data file (`.json`/`.csv`/`.yaml`/`.yml`/`.db`) +-- Score the three signatures and label by the DOMINANT one with a MARGIN guard: +-- manual signature = loop present + gate-pass-dominant + zero veto + code-heavy -- exploration signature = veto present + a rejected/cancelled path + prose-heavy --- A label is assigned ONLY when one score beats the other by >= 2 (a clear dominance); a --- track with neither clearly dominant stays `unlabeled` and the report gives it the generic +-- knowledge-work signature = kloop present + zero veto + no issue→PR loop + +-- content/ops edits (prose+data) dominate code (intake→process→ +-- file→log→close: a notes/ops shop, not a code repo) +-- A label is assigned ONLY when one score beats the others by >= 2 (a clear dominance); a +-- track with none clearly dominant stays `unlabeled` and the report gives it the generic -- book-keeping offer — NEVER a guessed automation pitch (the asymmetry favors not -- mis-offering: a missed automation offer is a cheap omission; a wrong automation pitch at --- creative work is the misread to avoid). The report reads `mode` per track to pick the offer. +-- creative work is the misread to avoid). The `manual` label is the repetitive-but-substantive +-- track (effortful, not trivial); `mechanical` is reserved for genuinely trivial edits, which +-- the classifier does not separately detect today, so every drive-loop track reads `manual`. +-- The report reads `mode` per track to pick the offer. WITH track_sessions AS ( SELECT s.id, s.git_branch AS track FROM sessions s @@ -346,6 +356,14 @@ loops AS ( WHERE m.content LIKE '%worktree%' OR m.content LIKE '%work-on-issue%' GROUP BY ts.track ), +kloops AS ( + SELECT ts.track, COUNT(*) AS n + FROM track_sessions ts JOIN messages m ON m.session_id = ts.id + WHERE m.content LIKE '%intake%' OR m.content LIKE '%process the%' + OR m.content LIKE '%file the%' OR m.content LIKE '%log the%' + OR m.content LIKE '%close out%' + GROUP BY ts.track +), passed AS ( SELECT ts.track, COUNT(*) AS n FROM track_sessions ts JOIN tool_calls t ON t.session_id = ts.id @@ -367,6 +385,8 @@ rejected AS ( edits AS ( SELECT ts.track, SUM(CASE WHEN fp LIKE '%.md' THEN 1 ELSE 0 END) AS prose, + SUM(CASE WHEN fp LIKE '%.json' OR fp LIKE '%.csv' OR fp LIKE '%.yaml' + OR fp LIKE '%.yml' OR fp LIKE '%.db' THEN 1 ELSE 0 END) AS data, SUM(CASE WHEN fp LIKE '%.go' OR fp LIKE '%.ts' OR fp LIKE '%.tsx' OR fp LIKE '%.py' OR fp LIKE '%.rs' OR fp LIKE '%.js' THEN 1 ELSE 0 END) AS code FROM track_sessions ts @@ -378,27 +398,97 @@ edits AS ( ), sig AS ( SELECT t.track, - COALESCE(v.n, 0) AS veto, COALESCE(l.n, 0) AS loop, + COALESCE(v.n, 0) AS veto, COALESCE(l.n, 0) AS loop, COALESCE(k.n, 0) AS kloop, COALESCE(p.n, 0) AS passed, COALESCE(r.n, 0) AS rejected, - COALESCE(e.code, 0) AS code, COALESCE(e.prose, 0) AS prose + COALESCE(e.code, 0) AS code, COALESCE(e.prose, 0) AS prose, COALESCE(e.data, 0) AS data FROM (SELECT DISTINCT track FROM track_sessions) t LEFT JOIN vetoes v ON v.track = t.track LEFT JOIN loops l ON l.track = t.track + LEFT JOIN kloops k ON k.track = t.track LEFT JOIN passed p ON p.track = t.track LEFT JOIN rejected r ON r.track = t.track LEFT JOIN edits e ON e.track = t.track ), scored AS ( SELECT *, - ((loop > 0) + (passed > rejected) + (veto = 0) + (code > prose)) AS mech, - ((veto > 0) + (rejected > 0) + (prose > code)) AS expl + ((loop > 0) + (passed > rejected) + (veto = 0) + (code > prose + data)) AS mech, + ((veto > 0) + (rejected > 0) + (prose > code)) AS expl, + -- knowledge-work scores ONLY when its defining intake→process→file→log→close marker is + -- present (kloop > 0); without it the score is 0, so a veto-free prose track without the + -- knowledge loop reads exploration/unlabeled, not knowledge-work. With the marker present, + -- it scores on the loop marker + the gate-pass batch confirms + the content/ops edit profile. + (CASE WHEN kloop > 0 + THEN (1 + (passed > rejected) + (loop = 0) + (prose + data > code)) + ELSE 0 END) AS know FROM sig ) SELECT track, CASE - WHEN mech - expl >= 2 THEN 'mechanical' - WHEN expl - mech >= 2 THEN 'exploration' + WHEN know - mech >= 2 AND know - expl >= 2 THEN 'knowledge-work' + WHEN mech - expl >= 2 AND mech - know >= 2 THEN 'manual' + WHEN expl - mech >= 2 AND expl - know >= 2 THEN 'exploration' ELSE 'unlabeled' END AS mode FROM scored ORDER BY track; + +-- name: dispatch-fact +-- za — the FACT of subagent dispatch, so an orchestrated repo isn't read as idle. The +-- body EXCLUDES subagent sessions everywhere (`file_path NOT LIKE '%/subagents/%'`), so a +-- repo whose real work lands in dispatched subagents shows an almost-empty body. This +-- surfaces only the FACT (a count), never subagent CONTENT. Marker: a subagent row is +-- `relationship_type = 'subagent'` and links to its parent via `parent_session_id` +-- (agentsview v0.32.1; the Task-tool `subagent_session_id` is unpopulated). Scope is the +-- body's exact parent scope — count a subagent ONLY when its PARENT is an in-repo, +-- non-subagent Claude session — so a subagent of an out-of-repo parent stays out. +-- `sessions_that_orchestrated` is the DISTINCT in-repo parent count (the orchestration +-- fact); `subagents_dispatched` is the total. The report renders one BY THE NUMBERS line +-- from these and drops it when sessions_that_orchestrated = 0. +SELECT + COUNT(DISTINCT p.id) AS sessions_that_orchestrated, + COUNT(*) AS subagents_dispatched +FROM sessions sub +JOIN sessions p ON p.id = sub.parent_session_id +WHERE sub.relationship_type = 'subagent' + AND p.agent = 'claude' + AND p.file_path NOT LIKE '%/subagents/%' + AND (p.cwd = :repo_root OR p.cwd LIKE :repo_root || '/%'); + +-- name: decision-no-followup +-- 9h — count the `done` decisions (answered AskUserQuestion / approved ExitPlanMode) in +-- repo-scoped Claude sessions that have NO Edit/Write LATER in the same session. This is the +-- "decisions you made with no follow-up action" BY THE NUMBERS figure — a decision settled +-- and then nothing built on it. It is DISTINCT from BACKLOG (decided-not-shipped): BACKLOG +-- is a transcript fork with no repo artifact, this is a same-session chronological gap. +-- "Later" is the REAL chronological order, not insertion order: each tool call links to its +-- message via `tool_calls.message_id`, and `messages.ordinal` (NOT NULL in agentsview) is the +-- message's chronological position — so "an Edit after this decision" is an ordinal compare, +-- never a `tool_calls.id` insertion-order compare (which would be a false oracle when calls +-- land out of insertion order). A `done` decision counts when no Edit/Write in the SAME +-- session sits at a strictly higher message ordinal. +WITH decisions AS ( + SELECT t.session_id AS sid, m.ordinal AS dord + FROM tool_calls t + JOIN messages m ON m.session_id = t.session_id AND CAST(m.id AS TEXT) = t.message_id + JOIN sessions s ON s.id = t.session_id + WHERE s.agent = 'claude' + AND s.file_path NOT LIKE '%/subagents/%' + AND (s.cwd = :repo_root OR s.cwd LIKE :repo_root || '/%') + AND t.tool_name IN ('AskUserQuestion', 'ExitPlanMode') + AND (t.result_content LIKE 'User has answered%' + OR t.result_content LIKE 'Your questions have been answered%' + OR t.result_content LIKE 'Your question has been answered%' + OR t.result_content LIKE 'User has approved your plan%' + OR t.result_content LIKE 'User approved%') +), +followups AS ( + SELECT t.session_id AS sid, m.ordinal AS eord + FROM tool_calls t + JOIN messages m ON m.session_id = t.session_id AND CAST(m.id AS TEXT) = t.message_id + WHERE t.tool_name IN ('Edit', 'Write') +) +SELECT COUNT(*) AS no_followup +FROM decisions d +WHERE NOT EXISTS ( + SELECT 1 FROM followups f WHERE f.sid = d.sid AND f.eord > d.dord +); From fd84312f853089002b142118a6b5b51bd3e6f87c Mon Sep 17 00:00:00 2001 From: CL Kao Date: Sat, 13 Jun 2026 00:33:52 -0700 Subject: [PATCH 2/5] survey: rewrite step-4 report to the value-&-numbers-first spine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Render-level redesign (AC-1/AC-2/AC-4/AC-6), folding all six feedback seeds into the captain-locked output structure: - step 4 report template rewritten: SpaceDock-survey title carrying the window (5x), recent-window-snapshot + agent-logs-only subhead, WHAT THIS GIVES YOU plain lede (9h), BY THE NUMBERS block (interruptions, hanging-threads count, no-follow-up count, sessions-read with the workdir-vs-name caveat, conditional za dispatch line), HOW YOU WORK de-jargoned loop + the knowledge-work archetype (zb#2), the ↓ full-analysis pointer, then the demoted detail: THREADS TO PULL reframe of the frontier (9h#4), BACKLOG, RECENT DECISIONS, WORKSTREAMS/WORK-BY-AREA (with the branch-and-merge caveat, zw#2)/CODEX (with the conditional all-unlabeled collapse, h5#1)/SCAFFOLD/INTERRUPTIONS, and one consolidated WHAT THIS CAN'T SEE block (5x). - explicit no-scratch-preamble instruction (h5#2): the first line is the title, never "I have everything I need" / "Let me ...". - mode-aware commission offer (zw#1): manual keeps gate-and-drive; exploration leads with iterate/steer; knowledge-work gets batch book-keeping. - step 2 run_query list + prose updated (dispatch-fact, decision-no-followup, knowledge-work archetype, manual rename); Overview + synthesis guidance updated to match. Proven by a LIVE survey drive over this repo's real agentsview corpus (68 sessions, 33 orchestrating parents, decision-no-followup=2, dispatch=33/1720) rendering the new spine with corpus-derived numbers and no scratch preamble — not a SKILL.md prose-grep. Co-Authored-By: Claude Opus 4.8 (1M context) --- skills/survey/SKILL.md | 130 +++++++++++++++++++++++++++-------------- 1 file changed, 86 insertions(+), 44 deletions(-) diff --git a/skills/survey/SKILL.md b/skills/survey/SKILL.md index c08f2c6a..3a993d2b 100644 --- a/skills/survey/SKILL.md +++ b/skills/survey/SKILL.md @@ -10,7 +10,7 @@ user-invocable: true Survey is the first thing you run on unfamiliar ground: it reconstructs what the AI agents in this project have implicitly been doing, from their session history. It reports the inferred workflow, the workstreams, the recent decisions, and — load-bearing — the OPEN decisions (the abandoned or unanswered forks) plus how often the human had to step in. Then it offers to commission a real spacedock workflow with explicit gates from what it found. -It reads **agentsview**'s session DB and is strictly read-only — the recommended queries live in `references/queries.sql` (one labeled query per concern) so nothing is a black box. The decision and interruption signals below are **Claude Code**'s; **Codex** is surfaced too, as its own body section (a workdir-attributed count + workstream clusters + activity), since Codex sessions land with no recorded cwd and need the `exec_command.$.workdir` signal to be scoped to this repo. Gemini and per-file Codex work-by-area remain deferred follow-ups. The closing move is the discovery → commission bridge: the OPEN decisions become candidate gates, the workstreams become candidate entities, the inferred loop becomes the stage list — and the offer is keyed to each track's MODE (automation for mechanical tracks, book-keeping for exploration tracks). +It reads **agentsview**'s session DB and is strictly read-only — the recommended queries live in `references/queries.sql` (one labeled query per concern) so nothing is a black box. The decision and interruption signals below are **Claude Code**'s; **Codex** is surfaced too, as its own body section (a workdir-attributed count + workstream clusters + activity), since Codex sessions land with no recorded cwd and need the `exec_command.$.workdir` signal to be scoped to this repo. Gemini and per-file Codex work-by-area remain deferred follow-ups. The report opens with plain value and concrete numbers from the user's own data, then demotes the mode/track vocabulary to a detail section below. The closing move is the discovery → commission bridge: the OPEN decisions become candidate gates, the workstreams become candidate entities, the inferred loop becomes the stage list — and the offer leads with plain value ("this helps you run the repetitive work and stop only at the calls you'd want to make"), keyed to each track's MODE (gate-and-drive for the manual-but-repetitive tracks, book-keeping for exploration and knowledge-work tracks). Run the four steps in order: **check agentsview → scan → recognize scaffold → report and offer**. @@ -104,7 +104,9 @@ run_query codex-activity # #323 — exec_command/update_plan/spawn_agent tal run_query scaffold-usage # #319 — behavioral skill_name family tally (spacedock self EXCLUDED) run_query work-by-area # #317.2 — Edit/Write file_path → LOGICAL area (worktree prefix stripped) + kind run_query decision-open # #320 — AskUserQuestion/ExitPlanMode frontier; OPEN sorts first -run_query mode-classification # #324 — classify each git_branch track mechanical/exploration/unlabeled +run_query decision-no-followup # 9h — done decisions with no later Edit/Write (message_id → ordinal join) +run_query mode-classification # #324 — classify each git_branch track manual/exploration/knowledge-work/unlabeled +run_query dispatch-fact # za — orchestration FACT: distinct in-repo parents that dispatched subagents + total dispatched ``` `scoping` returns `sessions=0` → there is no Claude agent history for this repo; say so and stop. Nothing to discover. (Survey reads Claude history only for now; a repo whose only agent history is Codex/Gemini will report "no agent history" here — surfacing those agents is a deferred follow-up.) Note the `blank_cwd` count in the report if non-zero (sessions agentsview never captured a cwd for, which the repo-root scope cannot place). @@ -113,10 +115,14 @@ run_query mode-classification # #324 — classify each git_branch track mechanic **Codex body signals (`codex-scoped`, `codex-workstreams`, `codex-activity`).** agentsview persists no Codex session cwd, but a Codex session's `exec_command` tool calls carry `$.workdir` (the absolute working directory of each shell command). `codex-scoped` attributes a Codex session to THIS repo when it has an `exec_command` whose `$.workdir` is under the repo-root prefix — the workdir analogue of the Claude cwd-prefix scope, so it admits this repo's Codex and EXCLUDES a same-basename sibling (whose workdirs fall under a different prefix). Over that sibling-free set, `codex-workstreams` clusters the sessions into ensign-task workstreams from each `first_message` (the runnable 3-case rule: dispatch-file read → `{TASK}` with the stage suffix stripped; `Spacedock task/entity` backtick → the backtick-quoted `{TASK}`; else `(unlabeled)`), and `codex-activity` tallies the per-tool activity (`exec_command`/`update_plan`/`spawn_agent`). These are the Codex body section (step 4) — the workstreams surface real Codex tracks the Claude-only body misses. All from the agentsview DB; no raw-rollout parsing. (Per-file Codex work-by-area and a source-health signal are deferred — they need an upstream agentsview ingestion change.) -**Track modes (`mode-classification`).** `mode-classification` labels each `git_branch` track `mechanical` (low veto + gate-pass-dominant + issue→worktree→PR loop markers + code edits), `exploration` (high veto + a rejected/cancelled path + prose/`.md` edits), or `unlabeled` (neither signature clearly dominant). The report reads the per-track `mode` to label WORKSTREAMS and to pick the right commission offer per track (step 4) — automation for mechanical, book-keeping for exploration, generic book-keeping for unlabeled (never a guessed automation pitch). +**Track modes (`mode-classification`).** `mode-classification` labels each `git_branch` track `manual` (low veto + gate-pass-dominant + issue→worktree→PR loop markers + code edits — the repetitive-but-substantive drive loop; the label is `manual`, not `mechanical`, because the work is effortful, not trivial — reserve "mechanical" for genuinely trivial edits, which the classifier does not separately detect today), `exploration` (high veto + a rejected/cancelled path + prose/`.md` edits), `knowledge-work` (an intake→process→file→log→close loop + content/ops edits — `.md` and data — dominating code, with no issue→PR loop and no veto-heavy creative signature: a notes/ops shop, not a code repo), or `unlabeled` (none clearly dominant). The report reads the per-track `mode` to label WORKSTREAMS and to pick the right commission offer per track (step 4) — gate-and-drive for manual, thread book-keeping for exploration, batch book-keeping for knowledge-work, generic book-keeping for unlabeled (never a guessed automation pitch). **Honest signal accounting.** The `decision-open` rows are the human-decision points; `OPEN` = still needs the human, and you lead the report with those. For the interruption total, count the AskUserQuestion / ExitPlanMode decisions plus the hard-veto markers Claude sessions retain (`[Request interrupted` / `Request interrupted by user` / `doesn't want to proceed` in the message stream), over the same repo-scoped session set; `pct = total*100/user_turns`. Never dress an empty section up as "no decisions" — if a section is empty, say the run found none of that signal. +**No-follow-up decisions (`decision-no-followup`).** This counts the `done` decisions (answered AskUserQuestion / approved ExitPlanMode) that had NO Edit/Write LATER in the same session — a call you settled and then built nothing on. "Later" is the real chronological order (`tool_calls.message_id → messages.ordinal`), not insertion order. It is DISTINCT from BACKLOG (decided-not-shipped, a transcript fork with no repo artifact): this is a same-session gap. It renders as one `BY THE NUMBERS` line. + +**Dispatch fact (`dispatch-fact`).** The body EXCLUDES dispatched-subagent sessions, so an orchestrated repo (most work inside subagents) reads as nearly idle. `dispatch-fact` counts the DISTINCT in-repo parent sessions that dispatched subagents and the total dispatched, joining each subagent (`relationship_type='subagent'`) to its parent and keeping only in-repo parents. It surfaces the FACT of orchestration, never subagent CONTENT — the line is dropped when no in-repo session orchestrated. + ## 3. Recognize an incumbent scaffold Recognize the scaffold from TWO signals and reconcile them — a file probe (what is installed on disk) and the behavioral tally (what actually ran), which is the `scaffold-usage` query you already ran in step 2. A file-only probe misses a scaffold that was invoked but isn't checked in; a tally-only read misses one installed but never used. Join them. @@ -143,76 +149,111 @@ Every `{slot}` below is a FILL slot: substitute the real value from the step-2 s **Cross-check the OPEN frontier against the repo (before you present it).** The `decision-open` query is a TRANSCRIPT-only scan — a fork that read OPEN there may already be shipped (a merged PR / a commit) and over-reports. For each transcript-OPEN fork, cross-reference the repo (`git log`, merged PRs via `gh pr list --state merged` if available, the working tree) and split it: - **shipped** → **DROP** from the frontier. Evidence: a merged PR or a git-log commit whose subject/body CONFIDENTLY references the fork (its decision header or branch — an exact-ish token match). -- **decided-not-shipped** → move to a **backlog** line (decided, no artifact yet). -- **never-decided** → **true open**, stays on the `NEEDS YOU` frontier. +- **decided-not-shipped** → move to a **BACKLOG** line (decided, no artifact yet). +- **never-decided** → **true open**, stays on the `THREADS TO PULL` frontier. **Conservative-match rule.** DROP only on a CONFIDENT repo match; anything less than confident → KEEP on the frontier. A false "still open" is a cheap nudge; a false "shipped" silently hides a real open fork — so the asymmetry favors keeping. **Mandatory degrade.** When NO repo signal is available (not a git repo, or `git log` / PR lookup fails or is empty), the frontier degrades to transcript-only and EVERY OPEN fork is flagged **`unverified`** in the report — never silently presented as authoritative. The degrade is the default behavior, not an error. -Lead with the one-line headline, then render the body DIRECTLY in the same turn — do NOT stop and ask first. The survey is read-only orientation: the body IS the value, and a pre-body confirm/menu is a round-trip with no decision behind it (and risks ending with no survey at all). The ONLY stop in this flow is the end-of-report commission OFFER (the real decision). So emit the headline and flow straight into the synthesis fence: +Render the report DIRECTLY in the same turn — do NOT stop and ask first, and do NOT precede it with any scratch-reasoning preamble. The first line the user sees MUST be the `SpaceDock survey —` title; never emit `I have everything I need`, `Let me cross-check …`, `Let me …`, or any "here is my plan" narration before the report. (The cross-check FINDINGS still appear — as report content in `THREADS TO PULL` / `BACKLOG` — but the scratch framing that produced them does not.) The survey is read-only orientation: the body IS the value, and a pre-body confirm/menu is a round-trip with no decision behind it. The ONLY stop in this flow is the end-of-report commission OFFER (the real decision). -> Found **{N} sessions** in `{project}` (`{date range}`), with **{D} decision points** and **{V} interruptions** — here's the lay of the land: +The report is **value & numbers first**: a plain "what this gives you" lede + concrete figures from the user's own data lead; the mode/track vocabulary demotes to a labeled detail section below the fold. Every figure is a FILL slot from the step-2 scan (a literal `{slot}` shown is a bug); every figure derives from the surveyed session rows, never templated prose. Emit the report: ``` -PROJECT: {basename} {sessions} Claude sessions · {date range} - {if blank_cwd>0: {blank_cwd} uncaptured-cwd sessions} - -CODEX (only if codex-scoped>0; workdir-attributed, distinct from the name-only presence flag) - {codex_scoped_sessions} Codex sessions attributed to this repo by exec_command working dir - {if codex-presence>codex-scoped: (codex-presence matches {codex_sessions} by project NAME only — may include a same-named sibling; the workdir-attributed count above is sibling-free)} - workstreams: {the codex-workstreams clusters — workstream → session count; (unlabeled) last} - activity: {the codex-activity tally — exec_command {n}, update_plan {n}, spawn_agent {n}} - -SCAFFOLD - {state-the-fact per family: family + invocation count + on-disk presence — e.g. "superpowers: 186 invocations (not checked in). Other one-offs: …"; or "none"} +SpaceDock survey — your last {N} days ← {N} from scoping.span +(recent-window snapshot · agent logs only{if blank_cwd>0: · {blank_cwd} sessions had no working dir, not placed}) + +WHAT THIS GIVES YOU + {plain language, no jargon: "You steer your agents by hand ~{interruptions} times over this + window. About {the manual/repeated share} are the same few moves. A SpaceDock workflow can + run the repetitive parts for you and stop only where you'd want a say."} + +BY THE NUMBERS + {interruptions} hand-steering interruptions ← decisions + veto markers (the {V} total) + {hanging} hanging threads (started, never closed) ← count of THREADS TO PULL (post-cross-check OPEN) + {no_followup} decisions you made with no follow-up action ← decision-no-followup (distinct from BACKLOG) + {sessions} sessions read{if codex-scoped>0: (Claude {claude} · Codex {codex_scoped} by working dir{if codex-presence>codex-scoped: ; name-match would say {codex_presence} — sibling repos, ignored})} + {if sessions_that_orchestrated>0: {sessions_that_orchestrated} sessions dispatched subagents ({subagents_dispatched} dispatched — their work isn't shown here)} + +HOW YOU WORK + {the inferred loop as an arrow chain} — {one honest line naming the dominant mode in PLAIN terms: + "Mostly manual, repetitive tracks (not trivial — they take real work)." for manual; + "Mostly exploratory — you steer an iterating agent." for exploration; + "A knowledge-work loop: intake → process → file → log → close." for knowledge-work} + + ↓ full analysis: modes, work-by-area, what this can't see + +═══ everything below is the demoted detail section ═══ + +THREADS TO PULL (where you are now + what's still open — only if any fork is OPEN after the repo cross-check) + {lead with the steady-state — where you are now and the few unresolved threads — and prompt + the next move ("have you thought about …?"), NOT a narration of past decision history.} + {if any OPEN exploration/knowledge-work track: held threads (tracked, prioritized — work you're holding, not bottlenecks):} + ◐ {the open EXPLORATION / knowledge-work forks — deliberately-held threads} + {if any OPEN manual track: manual backlog (gate-and-drive candidates):} + ⚠ {the open MANUAL forks — never-decided questions}{if degraded: each flagged unverified (no repo signal)} + +BACKLOG (decided-not-shipped — only if any fork was decided with no artifact yet) + {decided forks with no shipped artifact yet} ← NOT the no-follow-up figure (that's its own query) -INFERRED WORKFLOW - {the implicit loop across the decisions + prompts, as an arrow chain} — {one honest line} +RECENT DECISIONS (answered or shipped) + {the rest: header — short question} WORKSTREAMS mode - {cluster the decisions + prompts into tracks; one line each, status glyph + the mode-classification label (mechanical / exploration / unlabeled) per track} + {cluster the decisions + prompts into tracks; one line each, status glyph + the mode-classification label (manual / exploration / knowledge-work / unlabeled) per track} WORK BY AREA (logical areas; worktree edits attributed to their area — F) {the product work-by-area buckets (kind=product), by edit count: area — {edits}} {if any kind=config: (+ {sum} edits in .claude/.beads/.git config + sibling refs, footnoted)} + {if the inferred workflow is branch-and-merge (worktree → PR → merge) AND a config bucket out-edits product: caveat — edits counted are the directly-edited branch; product code that lands via merged PRs is under-counted here, so this does NOT mean scaffolding > product} -NEEDS YOU (only if any decision is still OPEN after the repo cross-check) - {if any OPEN exploration track: exploration (tracked, prioritized — work you're holding, not bottlenecks):} - ◐ {the open EXPLORATION forks — deliberately-held threads} - {if any OPEN mechanical track: mechanical (automatable backlog — gate-and-drive candidates):} - ⚠ {the open MECHANICAL forks — never-decided questions}{if degraded: each flagged unverified (no repo signal)} - -BACKLOG (only if any fork was decided-not-shipped) - {decided forks with no shipped artifact yet} +CODEX (only if codex-scoped>0; workdir-attributed, distinct from the name-only presence flag) + {codex_scoped_sessions} Codex sessions attributed to this repo by exec_command working dir + {if codex-presence>codex-scoped: (codex-presence matches {codex_sessions} by project NAME only — may include a same-named sibling; the workdir-attributed count above is sibling-free)} + {if the codex-workstreams clusters are ALL (unlabeled): {codex_scoped} Codex sessions, unclassified (ad-hoc shell-driven) — a single honest line, NOT a (unlabeled)-only breakdown} + {else: workstreams: {the codex-workstreams clusters — workstream → session count; (unlabeled) last}} + activity: {the codex-activity tally — exec_command {n}, update_plan {n}, spawn_agent {n}} -RECENT DECISIONS (answered or shipped) - {the rest: header — short question} +SCAFFOLD + {state-the-fact per family: family + invocation count + on-disk presence — e.g. "superpowers: 186 invocations (not checked in). Other one-offs: …"; or "none"} INTERRUPTIONS - {if any exploration track: exploration tracks: {n} steers across {m} sessions — this IS the work; book-keeping tracks the threads} - {if any mechanical track: mechanical tracks: {n} steps across {m} sessions — gates + autonomy would carry these between your calls} + {if any exploration/knowledge-work track: exploration/knowledge-work tracks: {n} steers across {m} sessions — this IS the work; book-keeping tracks the threads} + {if any manual track: manual tracks: {n} steps across {m} sessions — gates + autonomy would carry these between your calls} + +WHAT THIS CAN'T SEE (the agent-log corpus is a partial lens) + · work done outside agent sessions (manual edits, other tools, off-log discussion) + · the project's history before this {N}-day window + {if blank_cwd>0: · {blank_cwd} sessions with no recorded working dir} + {if codex-presence>codex-scoped: · {codex-presence − codex-scoped} Codex sessions matched by name only (possible same-named sibling)} + {if sessions_that_orchestrated>0: · work inside dispatched subagents (orchestration counted, content not — see "{subagents_dispatched} dispatched")} ``` +Each `·` / conditional line drops when its slot is empty (the drop-empty-slot rule). The `WHAT THIS CAN'T SEE` block is the SINGLE consolidated home for the lens caveats — do not also scatter `uncaptured-cwd` or Codex-sibling asides elsewhere in the body. + ### The discovery → commission bridge (close every report with this) -After the synthesis, offer spacedock — but key the offer to the MODE of each track (from the `mode-classification` query). Two modes call for two DIFFERENT things; do NOT make one undifferentiated automate-everything pitch. As in the synthesis above, every `{slot}` is a FILL slot: substitute the real step-2 numbers/forks/track-names before you show the user; a literal `{slot}` in your output is a bug. +After the report, offer spacedock — leading with plain value, then keyed to the MODE of each track (from the `mode-classification` query). The modes call for DIFFERENT things; do NOT make one undifferentiated automate-everything pitch. As in the report above, every `{slot}` is a FILL slot: substitute the real step-2 numbers/forks/track-names before you show the user; a literal `{slot}` in your output is a bug. -**For the MECHANICAL tracks (mode=mechanical) — offer AUTOMATION.** These are disciplined routine execution (the issue→worktree→PR loop, routine implementation): gate the crucial decisions and let the agent drive the loop between gates. Keep the gate-and-autonomy pitch — it is CORRECT for these. State it tied to the scan (the mechanical tracks' names + their gate-pass count or the interruption count). The per-scaffold flavor sharpens the automation offer: +**For the MANUAL tracks (mode=manual) — offer GATE-AND-DRIVE.** These are disciplined routine execution (the issue→worktree→PR loop, routine implementation): gate the crucial decisions and let the agent drive the loop between gates. Keep the gate-and-autonomy pitch — it is CORRECT for these. State it tied to the scan (the manual tracks' names + their gate-pass count or the interruption count). The per-scaffold flavor sharpens the offer: - **superpowers** maps its disciplines (brainstorming → writing-plans → executing-plans → subagent-driven-development) to stages with the interruption points made EXPLICIT gates. - **gsd / get-shit-done** maps its fixed phases to stages + durable entity state so several work items move concurrently, pausing only at gates. - > For your MECHANICAL tracks (**{the mechanical track names}**): a spacedock workflow that gates the crucial decisions and lets the agent drive the loop between gates — these passed **{the gate-pass count}**, so the agent can carry them and stop only where you marked a gate. + > For your MANUAL tracks (**{the manual track names}**): a spacedock workflow that runs the repetitive loop for you and stops only at the calls you'd want to make — these passed **{the gate-pass count}**, so the agent can carry them between your gates. + +**For the EXPLORATION tracks (mode=exploration) — lead with ITERATE/STEER, then BOOK-KEEPING; never automation.** These are human-driven creative/exploratory work (writing/content, design exploration, steering an agent that drifts): the involvement IS the point. The offer must speak to the iterate/explore loop FIRST — the agent iterates and you steer; an approval gate is ONE shape that loop can take, not the headline — do NOT lead with "explicit approval gates" for these. Then offer spacedock as structure for the parallel threads: track each draft/path and its state (in-flight / paused-by-choice / abandoned) so several run in parallel without losing which is which. An open thread is tracked-prioritized work, NOT a bottleneck; a cancelled path is a valid tracked outcome, NOT a failure. The exploration offer MUST NOT contain "advances on its own", "without you re-driving each", "minimize involvement", or any automate-the-human-out framing. + > For your EXPLORATION tracks (**{the exploration track names}**): the agent iterates and you steer — spacedock as book-keeping for the parallel threads, tracking each draft/design path and its state (in-flight / paused-by-choice / abandoned) so you run several at once without losing which is which. The **{the cancelled-path count}** cancelled paths are tracked outcomes, not failures; the involvement is the point, so there's no automation here — just structure for the threads. -**For the EXPLORATION tracks (mode=exploration) — offer BOOK-KEEPING, never automation.** These are human-driven creative/exploratory work (writing/content, design exploration, steering an agent that drifts): the involvement IS the point. Offer spacedock as structure for the parallel threads — track each draft/path and its state (in-flight / paused-by-choice / abandoned) so several run in parallel without losing which is which. An open thread is tracked-prioritized work, NOT a bottleneck; a cancelled path is a valid tracked outcome, NOT a failure. The exploration offer MUST NOT contain "advances on its own", "without you re-driving each", "minimize involvement", or any automate-the-human-out framing. - > For your EXPLORATION tracks (**{the exploration track names}**): spacedock as book-keeping — track each draft/design path and its state (in-flight / paused-by-choice / abandoned) so you run several in parallel without losing which is which. The **{the cancelled-path count}** cancelled paths are tracked outcomes, not failures; the involvement is the point, so there's no automation here — just structure for the threads. +**For the KNOWLEDGE-WORK tracks (mode=knowledge-work) — offer BATCH BOOK-KEEPING.** These are an intake→process→file→log→close loop (a notes/ops shop, not a code repo): the gates are "confirm this batch / approve this write / scope this draft." Name the loop and offer honest book-keeping for it — track each item from intake to close — NOT an automation pitch and NOT the generic fallback. + > For your KNOWLEDGE-WORK tracks (**{the knowledge-work track names}**): spacedock as book-keeping for the intake → process → file → log → close loop — track each item from intake to close and confirm the batches at the points you already pause, so nothing falls through between runs. **For UNLABELED tracks (mode=unlabeled) — generic book-keeping**, never a guessed automation pitch (the asymmetry favors not mis-offering: a missed automation offer is cheap; a wrong automation pitch at creative work is the misread to avoid). -If a project carries BOTH modes, make BOTH offers (they MUST differ — the mechanical one keeps the gate-and-drive pitch; the exploration one carries none of the automate-the-human-out framing). If it carries only one mode, make only that offer. **none** scaffold → the generic spacedock benefit, mode-keyed the same way. Each offer must cite a real scan number (filled track names, gate-pass count, OPEN forks, or cancelled-path count), not a placeholder. +If a project carries MULTIPLE modes, make EACH mode's offer (they MUST differ — the manual one keeps the gate-and-drive pitch; the exploration one leads with iterate/steer and carries none of the automate-the-human-out framing; the knowledge-work one names the batch loop). If it carries only one mode, make only that offer. **none** scaffold → the generic spacedock benefit, mode-keyed the same way. Each offer must cite a real scan number (filled track names, gate-pass count, OPEN forks, or cancelled-path count), not a placeholder. -Then make the offer: +Then make the offer, leading with the plain value: -> Want me to commission a spacedock workflow from this{if both modes: — gated automation for the mechanical tracks, thread book-keeping for the exploration tracks}? +> Want me to commission a spacedock workflow from this — so the repetitive work runs for you and stops only at the calls you'd want to make{if both manual and exploration modes: (gate-and-drive for the manual tracks; thread book-keeping for the exploration tracks)}? On a **yes**, invoke commission in batch mode, supplying inputs derived from the scan (commission already accepts batch design inputs in its first message — see its Batch Mode). Assemble: @@ -228,9 +269,10 @@ On a **no**, stop — the survey stands on its own as an orientation. ## Synthesis guidance - **Project name** = path basename. +- **Value & numbers first.** The report leads with the plain `WHAT THIS GIVES YOU` lede + the `BY THE NUMBERS` block, then demotes the mode/track vocabulary to the below-the-fold detail. Lead with what the user GETS, not with how the classifier labels their work. No scratch-reasoning preamble precedes the title. - **Workflow + workstreams: infer them**, primarily from the decisions (the `PROMPTS` are sparse/noisy — secondary). Be honest when a track is one-off or stalled. -- **Decisions + stats are data, not invention.** `OPEN` = still needs the human; lead the report with the true-open forks. The transcript scan can't tell shipped from open — that's what the step-4 repo cross-check is for; drop a fork to "shipped" only on a confident match, and flag the whole frontier `unverified` when there's no repo signal. -- **Work-by-area is identity, not a to-do list.** The `work-by-area` buckets say WHAT this project is (where edits land), by LOGICAL area regardless of physical location — a worktree edit is attributed to its area (a worktree `src/` edit is `src`), so worktree-based product work is not hidden. The lead lists product areas (`kind=product`) by edit count; genuine config (`.claude`/`.beads`/`.git`) and an `` sibling-repo path demote to a footnote (`kind=config`) — still counted, just not the project's identity. Report it separately from the decision frontier (where you stop). -- **Two work modes, two offers.** `mode-classification` labels each track mechanical / exploration / unlabeled. Mechanical tracks (the issue→worktree→PR loop) get the automation offer (gate-and-drive); exploration tracks (creative/content/design steering) get the book-keeping offer (track the parallel threads + their states) — the involvement IS the point, so NO automation pitch for them; an unlabeled track gets generic book-keeping, never a guessed automation pitch. +- **Decisions + stats are data, not invention.** `OPEN` = still needs the human; lead `THREADS TO PULL` with the true-open forks framed as the steady-state ("where you are now + what's still open"), plus a proactive prompt — NOT a narration of past decision history. The transcript scan can't tell shipped from open — that's what the step-4 repo cross-check is for; drop a fork to "shipped" only on a confident match, and flag the whole frontier `unverified` when there's no repo signal. +- **Work-by-area is identity, not a to-do list.** The `work-by-area` buckets say WHAT this project is (where edits land), by LOGICAL area regardless of physical location — a worktree edit is attributed to its area (a worktree `src/` edit is `src`), so worktree-based product work is not hidden. The lead lists product areas (`kind=product`) by edit count; genuine config (`.claude`/`.beads`/`.git`) and an `` sibling-repo path demote to a footnote (`kind=config`) — still counted, just not the project's identity. **Branch-aware caveat:** when the inferred workflow is branch-and-merge (worktree → PR → merge) and a config bucket out-edits product, the edit count over-weights directly-edited scaffolding on the working branch and under-counts product code that lands via merged PRs — so caveat the signal (the counted edits are the directly-edited branch; product is under-counted here) and do NOT conclude "scaffolding > product." Report it separately from the decision frontier (where you stop). +- **Work modes, mode-keyed offers.** `mode-classification` labels each track manual / exploration / knowledge-work / unlabeled. Manual tracks (the issue→worktree→PR loop — repetitive but substantive, not trivial) get the gate-and-drive offer; exploration tracks (creative/content/design steering) get the iterate/steer + book-keeping offer (track the parallel threads + their states) — the involvement IS the point, so NO automation pitch for them and do NOT lead with gates; knowledge-work tracks (the intake→process→file→log→close loop) get batch book-keeping (name the loop, confirm the batches); an unlabeled track gets generic book-keeping, never a guessed automation pitch. The word the report renders is `manual`, not `mechanical` (reserve "mechanical" for genuinely trivial edits). - **Fill every slot, never invent.** Every `{slot}` in the report and the comparison comes from the step-2 numbers; a literal `{slot}` shown to the user is a bug. If a section's signal is empty (no OPEN decisions, no interruptions, no edits), say the run found none — never dress an empty section up as "no decisions." - **Claude body + a Codex body section.** The Claude body (workflow, workstreams, decisions, work-by-area, scaffold) is built from Claude history. Codex is surfaced too, as its own section (step 4) from the `codex-scoped` set: the workdir-attributed count + the workstream clusters + the activity tally (Gemini and per-file Codex work-by-area remain deferred follow-ups). A repo whose ONLY history is Codex still reports "no agent history" at the `scoping=0` stop (the Claude body has nothing); the Codex section renders alongside a non-empty Claude body, not in place of it. From 45f2d9ed8f50dd75a90a0f9119879031d2b1591c Mon Sep 17 00:00:00 2001 From: CL Kao Date: Sat, 13 Jun 2026 00:36:44 -0700 Subject: [PATCH 3/5] survey: apply comm-officer prose polish to report lede + mode offers Clarity-only edits (Strunk): split run-ons in the WHAT THIS GIVES YOU lede and the manual/exploration/knowledge-work offers, tighten 'roughly N times in this window', 'these passed N times'. State-list notation kept in slash form to match the report's state glyphs. No {slot} tokens or structure changed. Co-Authored-By: Claude Opus 4.8 (1M context) --- skills/survey/SKILL.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/skills/survey/SKILL.md b/skills/survey/SKILL.md index 3a993d2b..3692ab62 100644 --- a/skills/survey/SKILL.md +++ b/skills/survey/SKILL.md @@ -165,9 +165,9 @@ SpaceDock survey — your last {N} days ← {N} from sc (recent-window snapshot · agent logs only{if blank_cwd>0: · {blank_cwd} sessions had no working dir, not placed}) WHAT THIS GIVES YOU - {plain language, no jargon: "You steer your agents by hand ~{interruptions} times over this - window. About {the manual/repeated share} are the same few moves. A SpaceDock workflow can - run the repetitive parts for you and stop only where you'd want a say."} + {plain language, no jargon: "You steer your agents by hand roughly {interruptions} times in + this window. About {the manual/repeated share} are the same few moves repeated. A SpaceDock + workflow can run those repetitive parts for you, stopping only where you'd want a say."} BY THE NUMBERS {interruptions} hand-steering interruptions ← decisions + veto markers (the {V} total) @@ -239,13 +239,13 @@ After the report, offer spacedock — leading with plain value, then keyed to th **For the MANUAL tracks (mode=manual) — offer GATE-AND-DRIVE.** These are disciplined routine execution (the issue→worktree→PR loop, routine implementation): gate the crucial decisions and let the agent drive the loop between gates. Keep the gate-and-autonomy pitch — it is CORRECT for these. State it tied to the scan (the manual tracks' names + their gate-pass count or the interruption count). The per-scaffold flavor sharpens the offer: - **superpowers** maps its disciplines (brainstorming → writing-plans → executing-plans → subagent-driven-development) to stages with the interruption points made EXPLICIT gates. - **gsd / get-shit-done** maps its fixed phases to stages + durable entity state so several work items move concurrently, pausing only at gates. - > For your MANUAL tracks (**{the manual track names}**): a spacedock workflow that runs the repetitive loop for you and stops only at the calls you'd want to make — these passed **{the gate-pass count}**, so the agent can carry them between your gates. + > For your MANUAL tracks (**{the manual track names}**): a spacedock workflow that runs the repetitive loop for you and stops only at the calls you'd want to make. These passed **{the gate-pass count}** times, so the agent can carry them between your gates. **For the EXPLORATION tracks (mode=exploration) — lead with ITERATE/STEER, then BOOK-KEEPING; never automation.** These are human-driven creative/exploratory work (writing/content, design exploration, steering an agent that drifts): the involvement IS the point. The offer must speak to the iterate/explore loop FIRST — the agent iterates and you steer; an approval gate is ONE shape that loop can take, not the headline — do NOT lead with "explicit approval gates" for these. Then offer spacedock as structure for the parallel threads: track each draft/path and its state (in-flight / paused-by-choice / abandoned) so several run in parallel without losing which is which. An open thread is tracked-prioritized work, NOT a bottleneck; a cancelled path is a valid tracked outcome, NOT a failure. The exploration offer MUST NOT contain "advances on its own", "without you re-driving each", "minimize involvement", or any automate-the-human-out framing. - > For your EXPLORATION tracks (**{the exploration track names}**): the agent iterates and you steer — spacedock as book-keeping for the parallel threads, tracking each draft/design path and its state (in-flight / paused-by-choice / abandoned) so you run several at once without losing which is which. The **{the cancelled-path count}** cancelled paths are tracked outcomes, not failures; the involvement is the point, so there's no automation here — just structure for the threads. + > For your EXPLORATION tracks (**{the exploration track names}**): you steer while the agent iterates. Spacedock acts as book-keeping for the parallel threads, tracking each draft/design path and its state (in-flight / paused-by-choice / abandoned) so you run several at once without losing which is which. The **{the cancelled-path count}** cancelled paths are tracked outcomes, not failures. The involvement is the point; there is no automation here, only structure for the threads. **For the KNOWLEDGE-WORK tracks (mode=knowledge-work) — offer BATCH BOOK-KEEPING.** These are an intake→process→file→log→close loop (a notes/ops shop, not a code repo): the gates are "confirm this batch / approve this write / scope this draft." Name the loop and offer honest book-keeping for it — track each item from intake to close — NOT an automation pitch and NOT the generic fallback. - > For your KNOWLEDGE-WORK tracks (**{the knowledge-work track names}**): spacedock as book-keeping for the intake → process → file → log → close loop — track each item from intake to close and confirm the batches at the points you already pause, so nothing falls through between runs. + > For your KNOWLEDGE-WORK tracks (**{the knowledge-work track names}**): spacedock acts as book-keeping for the intake → process → file → log → close loop. Track each item from intake to close and confirm the batches at the points where you already pause, so nothing falls through between runs. **For UNLABELED tracks (mode=unlabeled) — generic book-keeping**, never a guessed automation pitch (the asymmetry favors not mis-offering: a missed automation offer is cheap; a wrong automation pitch at creative work is the misread to avoid). From f3f25cde6ee5f117b349effd80d198a27400735b Mon Sep 17 00:00:00 2001 From: CL Kao Date: Sat, 13 Jun 2026 08:36:20 -0700 Subject: [PATCH 4/5] =?UTF-8?q?survey:=20cycle-1=20fold=20=E2=80=94=20know?= =?UTF-8?q?ledge-work=20offer=20reframe=20+=20spacedock-incumbent=20scaffo?= =?UTF-8?q?ld=20detection?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (A) Knowledge-work offer reframe (R5/R6): the shipped offer framed knowledge-work as passive book-keeping and disclaimed automation — backwards. Rewrite the step-4 knowledge-work mode-keyed offer to a per-thread tracker that AUTO-RUNS the processing the user already built: long-lived per-entity threads (per recurring counterpart / advisory topic / one-off interview-or-strategy pass); a new entry auto-runs the user's existing processing/commentary skills (their index/update skill becomes ONE automated step), stopping only at the judgment calls (intake/filing + commentary emphasis). Drop the "book-keeping" / "no automation" framing; mirror in the synthesis-guidance mode-keyed-offers bullet. Proven by a live drive over the knowledge-work fixture (notes-ops → knowledge-work; rendered offer names per-entity threads + auto-running existing skills + judgment-call gates, no "no automation"). (B) Spacedock-incumbent scaffold detection (R7; folds seed dyxqywnwb4c3zwb3pka1p6s0): the DB tally excludes WHERE family <> 'spacedock' (stops the survey's own self-call reading every repo as spacedock), which also hid spacedock from SCAFFOLD on a genuine spacedock repo. Add a spacedock FILE-PROBE to step 3 — a runnable spacedock_incumbent() bash function matching .spacedock-state/ / a workflow README with spacedock frontmatter / _mods/ — so SCAFFOLD names spacedock WHEN a workflow is on disk, while KEEPING the tally exclusion. The file-probe distinguishes the genuine incumbent from the self-call. New TestSurveyScaffoldIncumbentProbe extracts the function from SKILL.md and executes it over a committed testdata/scaffold/ fixture pair (workflow-on-disk → spacedock; survey-self-only → nothing) — outcome from on-disk state, not a SKILL.md grep. Live drive over this repo (a genuine spacedock repo): probe now names spacedock via docs/dev/.spacedock-state. 20/20 survey query+probe subtests; 1255/16 full go test; go vet clean. Existing R1-R6 behavior + KEEP-SIGNAL unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- skills/integration/survey_probe_test.go | 80 +++++++++++++++++++ .../dev/.spacedock-state/sample-entity.md | 7 ++ .../spacedock-on-disk/docs/dev/README.md | 11 +++ .../survey-self-only/.claude/skills/.gitkeep | 0 .../scaffold/survey-self-only/README.md | 3 + skills/survey/SKILL.md | 22 ++++- 6 files changed, 119 insertions(+), 4 deletions(-) create mode 100644 skills/integration/testdata/scaffold/spacedock-on-disk/docs/dev/.spacedock-state/sample-entity.md create mode 100644 skills/integration/testdata/scaffold/spacedock-on-disk/docs/dev/README.md create mode 100644 skills/integration/testdata/scaffold/survey-self-only/.claude/skills/.gitkeep create mode 100644 skills/integration/testdata/scaffold/survey-self-only/README.md diff --git a/skills/integration/survey_probe_test.go b/skills/integration/survey_probe_test.go index 7c1546ed..0ce1c5b3 100644 --- a/skills/integration/survey_probe_test.go +++ b/skills/integration/survey_probe_test.go @@ -110,3 +110,83 @@ func TestSurveyInstallProbe(t *testing.T) { } }) } + +// extractScaffoldIncumbentFn reads skills/survey/SKILL.md and returns the runnable +// `spacedock_incumbent()` bash function from the step-3 scaffold file-probe (the artifact +// under test). Extraction starts at the `spacedock_incumbent() {` line and runs to the +// closing two-space `}` line of the block. The test EXECUTES the shipped function rather +// than a copy, so the SKILL.md probe and this test cannot drift; removing the spacedock +// file-probe fails extraction here. +func extractScaffoldIncumbentFn(t *testing.T) string { + t.Helper() + path := filepath.Join(repoRoot(t), "skills", "survey", "SKILL.md") + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read SKILL.md %s: %v", path, err) + } + lines := strings.Split(string(data), "\n") + var body []string + inFn := false + for _, line := range lines { + if strings.Contains(line, "spacedock_incumbent() {") { + inFn = true + } + if inFn { + body = append(body, line) + if strings.TrimRight(line, " ") == " }" { // the function's closing brace (two-space indent) + break + } + } + } + if len(body) == 0 { + t.Fatalf("expected a runnable spacedock_incumbent() function in SKILL.md step-3 scaffold probe, found none") + } + return strings.Join(body, "\n") +} + +// runScaffoldProbe runs the extracted spacedock_incumbent function with cwd set to dir and +// returns its trimmed stdout. The probe inspects the filesystem under cwd, so dir IS the +// fixture condition — the outcome derives entirely from the fixture's on-disk state. +func runScaffoldProbe(t *testing.T, fn, dir string) string { + t.Helper() + bash, err := exec.LookPath("bash") + if err != nil { + t.Skip("bash not on PATH; the scaffold probe is a bash function") + } + cmd := exec.Command(bash, "-c", fn+"\nspacedock_incumbent") + cmd.Dir = dir + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("run scaffold probe in %s: %v\n%s", dir, err, out) + } + return strings.TrimRight(string(out), "\n") +} + +// TestSurveyScaffoldIncumbentProbe (cycle-1 B): the spacedock-incumbent file-probe behavior +// test. It runs the exact step-3 `spacedock_incumbent` function from SKILL.md over a +// committed FIXTURE PAIR of directories: (i) a repo with a spacedock workflow on disk (a +// .spacedock-state checkout + a workflow README with spacedock frontmatter) must echo +// "spacedock"; (ii) a survey-self-only repo (no workflow on disk — the kind a `spacedock:survey` +// self-call leaves) must echo nothing. The oracle is the two fixtures' ON-DISK STATE, never +// a SKILL.md grep: the file-probe is what distinguishes a genuine incumbent from the survey's +// own self-call, which the DB tally's `family <> 'spacedock'` exclusion deliberately drops. +func TestSurveyScaffoldIncumbentProbe(t *testing.T) { + fn := extractScaffoldIncumbentFn(t) + base := filepath.Join("testdata", "scaffold") + + // (i) a spacedock workflow on disk → named spacedock. + t.Run("workflow-on-disk", func(t *testing.T) { + got := runScaffoldProbe(t, fn, filepath.Join(base, "spacedock-on-disk")) + if got != "spacedock" { + t.Errorf("a repo with a spacedock workflow on disk must be named spacedock, got %q", got) + } + }) + + // (ii) survey-self-only, no workflow on disk → NOT named spacedock (the false-positive guard). + t.Run("survey-self-only", func(t *testing.T) { + got := runScaffoldProbe(t, fn, filepath.Join(base, "survey-self-only")) + if got != "" { + t.Errorf("a survey-self-only repo with no workflow on disk must NOT be named spacedock, got %q", got) + } + }) +} diff --git a/skills/integration/testdata/scaffold/spacedock-on-disk/docs/dev/.spacedock-state/sample-entity.md b/skills/integration/testdata/scaffold/spacedock-on-disk/docs/dev/.spacedock-state/sample-entity.md new file mode 100644 index 00000000..2dd5c3f7 --- /dev/null +++ b/skills/integration/testdata/scaffold/spacedock-on-disk/docs/dev/.spacedock-state/sample-entity.md @@ -0,0 +1,7 @@ +--- +id: sampleentity +title: A sample entity +status: ideation +--- + +Body. diff --git a/skills/integration/testdata/scaffold/spacedock-on-disk/docs/dev/README.md b/skills/integration/testdata/scaffold/spacedock-on-disk/docs/dev/README.md new file mode 100644 index 00000000..428ebf12 --- /dev/null +++ b/skills/integration/testdata/scaffold/spacedock-on-disk/docs/dev/README.md @@ -0,0 +1,11 @@ +--- +commissioned-by: spacedock +stages: + - ideation + - implementation + - validation +--- + +# Dev workflow + +A commissioned spacedock workflow. diff --git a/skills/integration/testdata/scaffold/survey-self-only/.claude/skills/.gitkeep b/skills/integration/testdata/scaffold/survey-self-only/.claude/skills/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/skills/integration/testdata/scaffold/survey-self-only/README.md b/skills/integration/testdata/scaffold/survey-self-only/README.md new file mode 100644 index 00000000..f16b2bed --- /dev/null +++ b/skills/integration/testdata/scaffold/survey-self-only/README.md @@ -0,0 +1,3 @@ +# Some project + +An ordinary repo. Someone ran `spacedock:survey` here once, but never commissioned a workflow. diff --git a/skills/survey/SKILL.md b/skills/survey/SKILL.md index 3692ab62..1358f2b4 100644 --- a/skills/survey/SKILL.md +++ b/skills/survey/SKILL.md @@ -129,12 +129,26 @@ Recognize the scaffold from TWO signals and reconcile them — a file probe (wha **File probe — multi-label, not a single winner.** Probe each scaffold INDEPENDENTLY and name EVERY match; report `none` only when no probe matched (the old single-winner if-ladder hid co-installed scaffolds): +- **spacedock** — a spacedock WORKFLOW is on disk: any of a `.spacedock-state/` dir (a workflow's state checkout — including the split-root `docs/**/.spacedock-state`), a workflow README carrying spacedock frontmatter (a `README.md` with `commissioned-by:` or `stages:` keys), or a `_mods/` dir (commissioned workflow scaffolding). The DB tally EXCLUDES the `spacedock` family (see Behavioral tally), so a genuine incumbent is recognized HERE, by the on-disk workflow — never by the survey's own `spacedock:survey` self-call. Probe it with (run from the repo root): + ```bash + # spacedock incumbent: a workflow on disk (state checkout, workflow README frontmatter, or _mods/). + # A `spacedock:survey` self-call leaves NO such file, so a survey-only repo prints nothing. + spacedock_incumbent() { # echoes "spacedock" iff a workflow is on disk + if find . -type d \( -name '.spacedock-state' -o -name '_mods' \) -print -quit 2>/dev/null | grep -q .; then + echo spacedock; return + fi + if find . -type f -name 'README.md' -print0 2>/dev/null \ + | xargs -0 grep -lE '^(commissioned-by|stages):' 2>/dev/null | grep -q .; then + echo spacedock; return + fi + } + ``` - **superpowers** — `.claude/skills/superpowers` exists, `superpowers` appears in `.claude-plugin/`, or a superpowers discipline skill dir is present (`.claude/skills/{brainstorming,writing-plans,executing-plans,subagent-driven-development,…}`); - **gsd / get-shit-done** — a `.claude/skills/gsd` or `.claude/skills/get-shit-done` dir, a `.claude/commands/gsd` dir, or a `GSD.md` / `gsd.md` / `.gsd` file; - **similar / unknown** — any other `.claude/skills` or `.claude/commands` tree (name the dirs you found); - **none** — none of the above is present on disk. -**Behavioral tally.** The `scaffold-usage` rows are a `family → invocations` tally normalized from `tool_calls.skill_name` (`superpowers:brainstorming` and the bare `running-research-spikes` both fold to family `superpowers`); the `spacedock` family is excluded because survey/ensign self-invocation otherwise dominates and would make every repo read as "uses spacedock". +**Behavioral tally.** The `scaffold-usage` rows are a `family → invocations` tally normalized from `tool_calls.skill_name` (`superpowers:brainstorming` and the bare `running-research-spikes` both fold to family `superpowers`); the `spacedock` family is excluded from the TALLY because survey/ensign self-invocation otherwise dominates and would make every surveyed repo read as "uses spacedock" (the false positive). That exclusion is the tally's; it does NOT hide a genuine spacedock incumbent — the spacedock FILE-PROBE above is what distinguishes a real on-disk workflow from the survey's own self-call. So a repo whose ONLY spacedock signal is the `spacedock:survey` self-call (no workflow on disk) is NOT named spacedock; a repo with a workflow on disk IS. **Join and state the fact.** For each family appearing in either signal, state two observed facts plainly: its invocation count (the behavioral tally) and whether it is checked in on disk (the file probe). Do not narrate HOW the family was discovered (behavior vs files) — state only the usage + on-disk fact. For a family invoked but absent from disk, state the count and that it is not checked in. For example: @@ -244,8 +258,8 @@ After the report, offer spacedock — leading with plain value, then keyed to th **For the EXPLORATION tracks (mode=exploration) — lead with ITERATE/STEER, then BOOK-KEEPING; never automation.** These are human-driven creative/exploratory work (writing/content, design exploration, steering an agent that drifts): the involvement IS the point. The offer must speak to the iterate/explore loop FIRST — the agent iterates and you steer; an approval gate is ONE shape that loop can take, not the headline — do NOT lead with "explicit approval gates" for these. Then offer spacedock as structure for the parallel threads: track each draft/path and its state (in-flight / paused-by-choice / abandoned) so several run in parallel without losing which is which. An open thread is tracked-prioritized work, NOT a bottleneck; a cancelled path is a valid tracked outcome, NOT a failure. The exploration offer MUST NOT contain "advances on its own", "without you re-driving each", "minimize involvement", or any automate-the-human-out framing. > For your EXPLORATION tracks (**{the exploration track names}**): you steer while the agent iterates. Spacedock acts as book-keeping for the parallel threads, tracking each draft/design path and its state (in-flight / paused-by-choice / abandoned) so you run several at once without losing which is which. The **{the cancelled-path count}** cancelled paths are tracked outcomes, not failures. The involvement is the point; there is no automation here, only structure for the threads. -**For the KNOWLEDGE-WORK tracks (mode=knowledge-work) — offer BATCH BOOK-KEEPING.** These are an intake→process→file→log→close loop (a notes/ops shop, not a code repo): the gates are "confirm this batch / approve this write / scope this draft." Name the loop and offer honest book-keeping for it — track each item from intake to close — NOT an automation pitch and NOT the generic fallback. - > For your KNOWLEDGE-WORK tracks (**{the knowledge-work track names}**): spacedock acts as book-keeping for the intake → process → file → log → close loop. Track each item from intake to close and confirm the batches at the points where you already pause, so nothing falls through between runs. +**For the KNOWLEDGE-WORK tracks (mode=knowledge-work) — offer a PER-THREAD TRACKER THAT AUTO-RUNS THE PROCESSING THEY ALREADY BUILT.** These are a recurring intake→process→commentary loop where the processing is already skill-shaped (an index/update skill, a commentary/summary skill the user wrote). Do NOT pitch this as passive book-keeping and do NOT disclaim automation — that is backwards for this class. Offer long-lived PER-ENTITY threads — one per recurring counterpart, one per advisory topic, plus one-off threads for interviews / strategy passes — and when a new entry lands on a thread, the workflow AUTO-RUNS the user's existing processing/commentary skills (their index/update skill becomes ONE automated step, not the whole job), stopping only at the judgment calls (intake/filing and what the commentary should emphasize). Keep the `knowledge-work` label, but the OFFER is for the recurring loop whose processing is already automatable; a knowledge-work track with no skill-shaped processing degrades to the generic book-keeping offer. + > For your KNOWLEDGE-WORK tracks (**{the knowledge-work track names}**): a spacedock workflow with a long-lived thread per entity — one per recurring counterpart, one per advisory topic, plus one-offs for interviews and strategy passes. When a new entry lands, it auto-runs the processing you already built (your index/update and commentary skills become automated steps), and stops only at the judgment calls — what to intake and file, and what the commentary should emphasize. **For UNLABELED tracks (mode=unlabeled) — generic book-keeping**, never a guessed automation pitch (the asymmetry favors not mis-offering: a missed automation offer is cheap; a wrong automation pitch at creative work is the misread to avoid). @@ -273,6 +287,6 @@ On a **no**, stop — the survey stands on its own as an orientation. - **Workflow + workstreams: infer them**, primarily from the decisions (the `PROMPTS` are sparse/noisy — secondary). Be honest when a track is one-off or stalled. - **Decisions + stats are data, not invention.** `OPEN` = still needs the human; lead `THREADS TO PULL` with the true-open forks framed as the steady-state ("where you are now + what's still open"), plus a proactive prompt — NOT a narration of past decision history. The transcript scan can't tell shipped from open — that's what the step-4 repo cross-check is for; drop a fork to "shipped" only on a confident match, and flag the whole frontier `unverified` when there's no repo signal. - **Work-by-area is identity, not a to-do list.** The `work-by-area` buckets say WHAT this project is (where edits land), by LOGICAL area regardless of physical location — a worktree edit is attributed to its area (a worktree `src/` edit is `src`), so worktree-based product work is not hidden. The lead lists product areas (`kind=product`) by edit count; genuine config (`.claude`/`.beads`/`.git`) and an `` sibling-repo path demote to a footnote (`kind=config`) — still counted, just not the project's identity. **Branch-aware caveat:** when the inferred workflow is branch-and-merge (worktree → PR → merge) and a config bucket out-edits product, the edit count over-weights directly-edited scaffolding on the working branch and under-counts product code that lands via merged PRs — so caveat the signal (the counted edits are the directly-edited branch; product is under-counted here) and do NOT conclude "scaffolding > product." Report it separately from the decision frontier (where you stop). -- **Work modes, mode-keyed offers.** `mode-classification` labels each track manual / exploration / knowledge-work / unlabeled. Manual tracks (the issue→worktree→PR loop — repetitive but substantive, not trivial) get the gate-and-drive offer; exploration tracks (creative/content/design steering) get the iterate/steer + book-keeping offer (track the parallel threads + their states) — the involvement IS the point, so NO automation pitch for them and do NOT lead with gates; knowledge-work tracks (the intake→process→file→log→close loop) get batch book-keeping (name the loop, confirm the batches); an unlabeled track gets generic book-keeping, never a guessed automation pitch. The word the report renders is `manual`, not `mechanical` (reserve "mechanical" for genuinely trivial edits). +- **Work modes, mode-keyed offers.** `mode-classification` labels each track manual / exploration / knowledge-work / unlabeled. Manual tracks (the issue→worktree→PR loop — repetitive but substantive, not trivial) get the gate-and-drive offer; exploration tracks (creative/content/design steering) get the iterate/steer + book-keeping offer (track the parallel threads + their states) — the involvement IS the point, so NO automation pitch for them and do NOT lead with gates; knowledge-work tracks (the recurring intake→process→commentary loop, where the processing is already skill-shaped) get a per-entity tracker that AUTO-RUNS the user's existing processing/commentary skills and stops only at the judgment calls — NOT passive book-keeping and NOT an automation disclaimer; an unlabeled track gets generic book-keeping, never a guessed automation pitch. The word the report renders is `manual`, not `mechanical` (reserve "mechanical" for genuinely trivial edits). - **Fill every slot, never invent.** Every `{slot}` in the report and the comparison comes from the step-2 numbers; a literal `{slot}` shown to the user is a bug. If a section's signal is empty (no OPEN decisions, no interruptions, no edits), say the run found none — never dress an empty section up as "no decisions." - **Claude body + a Codex body section.** The Claude body (workflow, workstreams, decisions, work-by-area, scaffold) is built from Claude history. Codex is surfaced too, as its own section (step 4) from the `codex-scoped` set: the workdir-attributed count + the workstream clusters + the activity tally (Gemini and per-file Codex work-by-area remain deferred follow-ups). A repo whose ONLY history is Codex still reports "no agent history" at the `scoping=0` stop (the Claude body has nothing); the Codex section renders alongside a non-empty Claude body, not in place of it. From 31f6cfb0a878647989701c2935443736f4787e3b Mon Sep 17 00:00:00 2001 From: CL Kao Date: Sat, 13 Jun 2026 09:56:07 -0700 Subject: [PATCH 5/5] =?UTF-8?q?survey:=20cycle-2=20fold=20=E2=80=94=20name?= =?UTF-8?q?=20the=20specific=20TYPE=20of=20knowledge=20work=20in=20the=20r?= =?UTF-8?q?eport=20body?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The render leaned on the bare archetype label "knowledge-work" (HOW YOU WORK line, WORKSTREAMS mode column) — too vague. Now NAME THE SPECIFIC TYPE, synthesized from the workstream names + areas (+ the user's checked-in skills), so "knowledge-work" never stands alone: - SKILL.md step 4 HOW YOU WORK: the knowledge-work line characterizes the actual kinds (e.g. "People 1-1s & team assessment, … memo intake & filing — a knowledge-work loop run mostly autonomously"), not a bare "A knowledge-work loop". - SKILL.md step 4 WORKSTREAMS mode column: qualify the bare label with the track's specific type (e.g. `knowledge-work · 1-1s & team assessment`). The mode-classification VALUE stays `knowledge-work` (the underlying class); only the user-facing characterization names the type. - synthesis-guidance: new "Name the SPECIFIC TYPE of knowledge work" bullet. No sub-type classifier — the type is synthesized from the workstream/area signal the survey already has. Seed a SECOND distinct knowledge-work track (client-1on1s: people 1-1s & assessment) beside notes-ops so the render demonstrably names ≥2 DISTINCT types; mode-classification asserts both → knowledge-work; scoping 15→17. Proven by a live drive over the two-track fixture: HOW YOU WORK + WORKSTREAMS name distinct specific types per track, knowledge-work never standing alone. R1-R6 + cycle-1 (offer reframe, AC-7 scaffold probe) intact. 20/20 query+probe subtests; 1255/16 full go test; go vet clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- skills/integration/survey_queries_test.go | 21 +++++++---- .../testdata/survey/fixture-sessions.sql | 36 +++++++++++++++++++ skills/survey/SKILL.md | 10 ++++-- 3 files changed, 58 insertions(+), 9 deletions(-) diff --git a/skills/integration/survey_queries_test.go b/skills/integration/survey_queries_test.go index 4ddd9c6d..c19cc621 100644 --- a/skills/integration/survey_queries_test.go +++ b/skills/integration/survey_queries_test.go @@ -163,9 +163,10 @@ func TestSurveyQuerySmoke(t *testing.T) { // the in-repo Claude sessions (cwd AT root, subdir, worktree, plus the F/G worktree-shape // + the mode-classification track sessions, all under the prefix) and EXCLUDES the // blank-cwd session, the out-of-repo session, the dispatched SUBAGENT sessions (file_path - // under %/subagents/%), and ALL the codex rows. The fixture has 15 in-repo non-subagent + // under %/subagents/%), and ALL the codex rows. The fixture has 17 in-repo non-subagent // Claude sessions: A,B,C + WT + issue-feed×2 + landing-copy×2 + mixed-bag (9) + the two - // dispatch parents + two decision-no-followup + two knowledge-work `notes-ops` (6). + // dispatch parents + two decision-no-followup + two knowledge-work `notes-ops` (6) + two + // knowledge-work `client-1on1s` (2, the cycle-2 second knowledge-work track). t.Run("scoping", func(t *testing.T) { rows := runQuery(t, db, queries["scoping"]) if len(rows) != 1 { @@ -175,8 +176,8 @@ func TestSurveyQuerySmoke(t *testing.T) { if len(fields) != 3 { t.Fatalf("scoping row should have 3 fields (sessions|blank_cwd|span) — folded_keys is dropped, got: %q", rows[0]) } - if fields[0] != "15" { - t.Errorf("the cwd-prefix should count 15 in-repo non-subagent Claude sessions, got sessions=%q", fields[0]) + if fields[0] != "17" { + t.Errorf("the cwd-prefix should count 17 in-repo non-subagent Claude sessions, got sessions=%q", fields[0]) } if fields[1] != "0" { t.Errorf("the blank-cwd Claude session is outside the prefix and must not count, got blank_cwd=%q", fields[1]) @@ -302,15 +303,15 @@ func TestSurveyQuerySmoke(t *testing.T) { }) // no-union (AC-2c): the added Codex rows must NOT inflate the Claude scope. The scoping - // query is asserted to 15 above (the Claude-only in-repo count), proving Codex stays out + // query is asserted to 17 above (the Claude-only in-repo count), proving Codex stays out // of the Claude `sessions` count — a flagged presence, never a silent project union. t.Run("codex-not-folded-into-scope", func(t *testing.T) { rows := runQuery(t, db, queries["scoping"]) if len(rows) != 1 { t.Fatalf("scoping should return one summary row, got %d: %v", len(rows), rows) } - if sessions := strings.Split(rows[0], "|")[0]; sessions != "15" { - t.Errorf("the Codex rows must not be folded into the Claude scope; scoping.sessions should stay 15, got %q", sessions) + if sessions := strings.Split(rows[0], "|")[0]; sessions != "17" { + t.Errorf("the Codex rows must not be folded into the Claude scope; scoping.sessions should stay 17, got %q", sessions) } }) @@ -436,6 +437,12 @@ func TestSurveyQuerySmoke(t *testing.T) { if mode["notes-ops"] != "knowledge-work" { t.Errorf("the intake→process→file→log→close + content/ops-edits track should classify knowledge-work, got %q in %v", mode["notes-ops"], mode) } + // cycle 2: a SECOND distinct knowledge-work track (client-1on1s) so the render names ≥2 + // specific types. Both classify knowledge-work (the CLASS stays); the render qualifies + // each with its own workstream-derived type (notes-ops vs 1-1s & assessment). + if mode["client-1on1s"] != "knowledge-work" { + t.Errorf("the second knowledge-work track (people 1-1s & assessment) should classify knowledge-work, got %q in %v", mode["client-1on1s"], mode) + } if mode["mixed-bag"] != "unlabeled" { t.Errorf("a neither-dominant track must stay unlabeled (generic book-keeping, never a guessed automation pitch), got %q in %v", mode["mixed-bag"], mode) } diff --git a/skills/integration/testdata/survey/fixture-sessions.sql b/skills/integration/testdata/survey/fixture-sessions.sql index 8dffa741..d61e16c5 100644 --- a/skills/integration/testdata/survey/fixture-sessions.sql +++ b/skills/integration/testdata/survey/fixture-sessions.sql @@ -577,3 +577,39 @@ INSERT INTO messages (id, session_id, ordinal, role, content) VALUES (111, 'claude:c6111111-0000-0000-0000-000000000001', 2, 'user', 'then file the ones that are ready'), (112, 'claude:c6222222-0000-0000-0000-000000000002', 1, 'user', 'process the batch we queued'), (113, 'claude:c6222222-0000-0000-0000-000000000002', 2, 'user', 'log the run and close out the day'); + +-- ============================================================================ +-- KNOWLEDGE-WORK track `client-1on1s` (cycle 2 / AC-5 specific-type). A SECOND, distinct +-- knowledge-work track so the render can demonstrably NAME multiple specific types — here +-- people 1-1s & assessment, alongside `notes-ops`. Same knowledge-work signature (intake→ +-- process→file→log→close loop markers + content/ops `.md`+`.json` edits + gate-pass batch +-- confirm + zero veto + no issue→PR loop), so mode-classification emits `knowledge-work`; +-- its distinct workstream NAME (`client-1on1s`) + areas (`people/`, `assessments/`) are the +-- specific-type signal the HOW YOU WORK / WORKSTREAMS render synthesizes from. +-- ============================================================================ +INSERT INTO sessions VALUES + ('claude:c7111111-0000-0000-0000-000000000001', 'proj', 'claude', + '/repo/proj', 'client-1on1s', + '/u/.claude/projects/-repo-proj/c7111111.jsonl', + '2026-06-11', '2026-06-11', 'Intake the 1-1 notes and process the team assessment.', 5, 2, + NULL, ''), + ('claude:c7222222-0000-0000-0000-000000000002', 'proj', 'claude', + '/repo/proj', 'client-1on1s', + '/u/.claude/projects/-repo-proj/c7222222.jsonl', + '2026-06-11', '2026-06-11', 'File the assessment and log the follow-ups.', 4, 2, + NULL, ''); +INSERT INTO tool_calls (id, session_id, tool_name, input_json, result_content) VALUES + (94, 'claude:c7111111-0000-0000-0000-000000000001', 'AskUserQuestion', + '{"questions":[{"header":"Assessment scope","question":"Process this batch of 1-1 notes now?"}]}', + 'Your questions have been answered: "Process this batch of 1-1 notes now?"="yes"'), + (95, 'claude:c7111111-0000-0000-0000-000000000001', 'Write', + '{"file_path":"/repo/proj/people/2026-06-11-1on1.md"}', NULL), + (96, 'claude:c7222222-0000-0000-0000-000000000002', 'Write', + '{"file_path":"/repo/proj/assessments/team.md"}', NULL), + (97, 'claude:c7222222-0000-0000-0000-000000000002', 'Edit', + '{"file_path":"/repo/proj/people/roster.json"}', NULL); +INSERT INTO messages (id, session_id, ordinal, role, content) VALUES + (114, 'claude:c7111111-0000-0000-0000-000000000001', 1, 'user', 'intake the 1-1 notes from today'), + (115, 'claude:c7111111-0000-0000-0000-000000000001', 2, 'user', 'then process the team assessment'), + (116, 'claude:c7222222-0000-0000-0000-000000000002', 1, 'user', 'file the assessment write-up'), + (117, 'claude:c7222222-0000-0000-0000-000000000002', 2, 'user', 'log the follow-ups and close out'); diff --git a/skills/survey/SKILL.md b/skills/survey/SKILL.md index 1358f2b4..f150b4f7 100644 --- a/skills/survey/SKILL.md +++ b/skills/survey/SKILL.md @@ -194,7 +194,12 @@ HOW YOU WORK {the inferred loop as an arrow chain} — {one honest line naming the dominant mode in PLAIN terms: "Mostly manual, repetitive tracks (not trivial — they take real work)." for manual; "Mostly exploratory — you steer an iterating agent." for exploration; - "A knowledge-work loop: intake → process → file → log → close." for knowledge-work} + for knowledge-work, NAME THE SPECIFIC TYPE(S) synthesized from the workstream names + areas + (+ the user's checked-in skills) — never the bare archetype label alone. e.g. + "People 1-1s & team assessment, positioning, interviews, and strategy mapping — a + knowledge-work loop run mostly autonomously." Name the actual kinds the workstreams show + (each track's WORKSTREAMS-cluster name is the source); "knowledge-work" appears only as the + trailing class, never standing alone} ↓ full analysis: modes, work-by-area, what this can't see @@ -215,7 +220,7 @@ RECENT DECISIONS (answered or shipped) {the rest: header — short question} WORKSTREAMS mode - {cluster the decisions + prompts into tracks; one line each, status glyph + the mode-classification label (manual / exploration / knowledge-work / unlabeled) per track} + {cluster the decisions + prompts into tracks; one line each, status glyph + the mode-classification label (manual / exploration / knowledge-work / unlabeled) per track. For a knowledge-work track, do NOT print the bare label — qualify it with the track's specific type drawn from its workstream name + areas, e.g. "knowledge-work · 1-1s & team assessment" or "knowledge-work · strategy mapping". The label class stays `knowledge-work`; the qualifier names the type.} WORK BY AREA (logical areas; worktree edits attributed to their area — F) {the product work-by-area buckets (kind=product), by edit count: area — {edits}} @@ -288,5 +293,6 @@ On a **no**, stop — the survey stands on its own as an orientation. - **Decisions + stats are data, not invention.** `OPEN` = still needs the human; lead `THREADS TO PULL` with the true-open forks framed as the steady-state ("where you are now + what's still open"), plus a proactive prompt — NOT a narration of past decision history. The transcript scan can't tell shipped from open — that's what the step-4 repo cross-check is for; drop a fork to "shipped" only on a confident match, and flag the whole frontier `unverified` when there's no repo signal. - **Work-by-area is identity, not a to-do list.** The `work-by-area` buckets say WHAT this project is (where edits land), by LOGICAL area regardless of physical location — a worktree edit is attributed to its area (a worktree `src/` edit is `src`), so worktree-based product work is not hidden. The lead lists product areas (`kind=product`) by edit count; genuine config (`.claude`/`.beads`/`.git`) and an `` sibling-repo path demote to a footnote (`kind=config`) — still counted, just not the project's identity. **Branch-aware caveat:** when the inferred workflow is branch-and-merge (worktree → PR → merge) and a config bucket out-edits product, the edit count over-weights directly-edited scaffolding on the working branch and under-counts product code that lands via merged PRs — so caveat the signal (the counted edits are the directly-edited branch; product is under-counted here) and do NOT conclude "scaffolding > product." Report it separately from the decision frontier (where you stop). - **Work modes, mode-keyed offers.** `mode-classification` labels each track manual / exploration / knowledge-work / unlabeled. Manual tracks (the issue→worktree→PR loop — repetitive but substantive, not trivial) get the gate-and-drive offer; exploration tracks (creative/content/design steering) get the iterate/steer + book-keeping offer (track the parallel threads + their states) — the involvement IS the point, so NO automation pitch for them and do NOT lead with gates; knowledge-work tracks (the recurring intake→process→commentary loop, where the processing is already skill-shaped) get a per-entity tracker that AUTO-RUNS the user's existing processing/commentary skills and stops only at the judgment calls — NOT passive book-keeping and NOT an automation disclaimer; an unlabeled track gets generic book-keeping, never a guessed automation pitch. The word the report renders is `manual`, not `mechanical` (reserve "mechanical" for genuinely trivial edits). +- **Name the SPECIFIC TYPE of knowledge work.** `knowledge-work` is the underlying archetype CLASS (the `mode-classification` value); it must NEVER stand alone in the user-facing report. Synthesize the specific type(s) — from the workstream cluster names + their work-by-area + the user's checked-in skills — and use that characterization in `HOW YOU WORK` (e.g. "People 1-1s & team assessment, positioning, interviews, and strategy mapping — a knowledge-work loop run mostly autonomously") and in the WORKSTREAMS mode column (e.g. `knowledge-work · 1-1s & team assessment`). The offer already names specifics (1-1s, interviews, strategy passes); extend that same specificity to the report BODY. Do NOT add a sub-type classifier — the type is synthesized from the workstream/area signal the survey already has. - **Fill every slot, never invent.** Every `{slot}` in the report and the comparison comes from the step-2 numbers; a literal `{slot}` shown to the user is a bug. If a section's signal is empty (no OPEN decisions, no interruptions, no edits), say the run found none — never dress an empty section up as "no decisions." - **Claude body + a Codex body section.** The Claude body (workflow, workstreams, decisions, work-by-area, scaffold) is built from Claude history. Codex is surfaced too, as its own section (step 4) from the `codex-scoped` set: the workdir-attributed count + the workstream clusters + the activity tally (Gemini and per-file Codex work-by-area remain deferred follow-ups). A repo whose ONLY history is Codex still reports "no agent history" at the `scoping=0` stop (the Claude body has nothing); the Codex section renders alongside a non-empty Claude body, not in place of it.