diff --git a/CLAUDE.md b/CLAUDE.md index a321d21..6295a79 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1062,3 +1062,12 @@ are usually marketing / human-PR docs, not agent guidance. `complete` and emits a single `Finish` chunk. - **Tool naming collisions** are silent — if you register two tools with the same `name()`, the second wins. Prefer unique, namespaced names (`fs.read`, `http.fetch`). +- **Wire-shape types are codegen'd to TypeScript.** Rust types crossing the SPA + boundary (REST replies, WS frames) use `#[derive(ts_rs::TS)]` so the frontend + imports a generated `.ts` instead of hand-maintaining a duplicate. Annotations + live on the type in its owning domain crate (`harness-channel`, + `harness-project`, `harness-observability` — never `harness-core`). + Regenerate with `make ts-codegen` after changing an annotated type; the + output under `apps/jarvis-web/src/types/generated/` is committed to git so + the SPA-only build doesn't need a Rust toolchain. See + `docs/conventions/rust-ts-codegen.md`. diff --git a/Cargo.lock b/Cargo.lock index 9665f29..9a35933 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2003,12 +2003,13 @@ dependencies = [ [[package]] name = "harness-channel" -version = "0.1.0" +version = "0.2.0" dependencies = [ "async-trait", "chrono", "serde", "serde_json", + "ts-rs", "uuid", ] @@ -2100,7 +2101,7 @@ dependencies = [ [[package]] name = "harness-observability" -version = "0.1.0" +version = "0.2.0" dependencies = [ "async-trait", "serde", @@ -2126,7 +2127,7 @@ dependencies = [ [[package]] name = "harness-project" -version = "0.1.0" +version = "0.2.0" dependencies = [ "async-trait", "chrono", @@ -2134,6 +2135,7 @@ dependencies = [ "serde", "serde_json", "tokio", + "ts-rs", "uuid", ] @@ -2252,6 +2254,7 @@ name = "harness-tools" version = "0.2.0" dependencies = [ "async-trait", + "blake3", "chrono", "diffy", "harness-channel", @@ -5688,6 +5691,15 @@ dependencies = [ "utf-8", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "thiserror" version = "1.0.69" @@ -6253,6 +6265,29 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "ts-rs" +version = "10.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e640d9b0964e9d39df633548591090ab92f7a4567bc31d3891af23471a3365c6" +dependencies = [ + "lazy_static", + "thiserror 2.0.18", + "ts-rs-macros", +] + +[[package]] +name = "ts-rs-macros" +version = "10.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e9d8656589772eeec2cf7a8264d9cda40fb28b9bc53118ceb9e8c07f8f38730" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "termcolor", +] + [[package]] name = "tungstenite" version = "0.24.0" diff --git a/Cargo.toml b/Cargo.toml index 2e75af5..ff73742 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -86,6 +86,13 @@ aes = "0.8" cbc = "0.1" rand = "0.8" base64 = "0.22" +# Rust → TypeScript codegen for wire-shape types shared with the +# web frontend. `cargo test --workspace` runs the embedded `export` +# tests `ts-rs` injects for each `#[derive(TS)]` type and writes a +# `.ts` file under `apps/jarvis-web/src/types/generated/`. The +# frontend imports from there instead of hand-maintaining +# duplicates. See `docs/conventions/rust-ts-codegen.md`. +ts-rs = { version = "10", features = ["serde-compat", "no-serde-warnings"] } open = "5" tiktoken-rs = "0.6" libc = "0.2" diff --git a/Makefile b/Makefile index 55c3ee1..b9476a9 100644 --- a/Makefile +++ b/Makefile @@ -84,6 +84,24 @@ test: ## Run the workspace test suite .PHONY: check check: lint test ## Run clippy + tests, what CI runs +# --------------------------------------------------------------------------- +# Rust → TypeScript type codegen (see docs/conventions/rust-ts-codegen.md) +# --------------------------------------------------------------------------- +# Every `#[derive(TS)]` type emits its own `.ts` under +# `apps/jarvis-web/src/types/generated/` when the embedded export +# test runs. Crates with annotated types today: harness-channel, +# harness-project. Add more by following the convention doc. +# +# Output goes in git so the SPA-only Vite build doesn't need a +# Rust toolchain. `make ts-codegen` is the canonical "I changed a +# wire type, regenerate" target; CI's `make test` covers it as a +# side effect. +.PHONY: ts-codegen +ts-codegen: ## Regenerate TS types from Rust (`#[derive(TS)]`) + $(CARGO) test -p harness-channel -p harness-project --lib --quiet + @printf "\ngenerated:\n" + @ls apps/jarvis-web/src/types/generated/ | sed 's/^/ /' + # --------------------------------------------------------------------------- # Docker / Compose # --------------------------------------------------------------------------- diff --git a/apps/jarvis-cli/src/main.rs b/apps/jarvis-cli/src/main.rs index 282d668..68b3c04 100644 --- a/apps/jarvis-cli/src/main.rs +++ b/apps/jarvis-cli/src/main.rs @@ -73,6 +73,31 @@ pub struct Args { #[arg(long)] pub no_git_read: bool, + /// Enable the agent-maintained `memory.{list,read,write,delete}` + /// tools (M3.1). Off by default — opt in when you want the CLI + /// session to persist notes under `/.jarvis/memory/` + /// and inject MEMORY.md into the system prompt at startup. + /// `memory.write` / `memory.delete` are approval-gated. + #[arg(long)] + pub enable_memory: bool, + + /// Enable the P10 git-sync tools (`memory.sync`, + /// `memory.sync_status`). The memory dir must be a git working + /// tree with a configured remote; the tools wrap + /// `git pull --rebase && git push` so notes propagate between + /// machines / teammates. Off by default — only useful once + /// you've actually set up a remote. + #[arg(long)] + pub enable_memory_sync: bool, + + /// Enable the `enter_plan_mode` tool so the model can volunteer + /// to switch into Plan Mode before risky changes. Default: on + /// (the CLI's `fs.edit` is on by default, so coding-mode + /// criteria are met). Pass `--no-enter-plan-mode` to disable + /// and keep Plan-Mode entry strictly operator-driven. + #[arg(long, action = clap::ArgAction::SetTrue, default_value_t = false)] + pub no_enter_plan_mode: bool, + /// Pipe mode: read the prompt from `--prompt` (or stdin if /// omitted), run one turn with `AlwaysDeny` so no tool that /// needs a human can fire, print the final assistant text, diff --git a/apps/jarvis-cli/src/runner.rs b/apps/jarvis-cli/src/runner.rs index e8baa0a..7e38e7a 100644 --- a/apps/jarvis-cli/src/runner.rs +++ b/apps/jarvis-cli/src/runner.rs @@ -151,6 +151,20 @@ pub(crate) async fn load_project_prelude(needle: &str) -> Result { )) } +fn resolve_memory_user_root() -> Option { + if let Ok(v) = std::env::var("JARVIS_MEMORY_USER_ROOT") { + let trimmed = v.trim(); + if trimmed.is_empty() { + return None; + } + return Some(std::path::PathBuf::from(trimmed)); + } + std::env::var_os("HOME") + .map(std::path::PathBuf::from) + .or_else(|| std::env::var_os("USERPROFILE").map(std::path::PathBuf::from)) + .map(|h| h.join(".jarvis")) +} + fn build_tools(args: &Args, workspace: &Path) -> ToolRegistry { let cfg = BuiltinsConfig { fs_root: workspace.to_path_buf(), @@ -165,6 +179,21 @@ fn build_tools(args: &Args, workspace: &Path) -> ToolRegistry { enable_fs_write: args.allow_fs_write, enable_shell_exec: args.allow_shell, enable_git_read: !args.no_git_read, + // CLI defaults: enter_plan_mode on (coding REPL benefits from + // the model being able to volunteer a plan-first pass); + // memory tools off until opted in. + enable_enter_plan_mode: !args.no_enter_plan_mode, + enable_memory: args.enable_memory, + // P9: user-scope memory follows the operator across + // workspaces. Default to `~/.jarvis` so the same notes + // are visible from any CLI invocation; `JARVIS_MEMORY_USER_ROOT` + // overrides (e.g. point at a Dropbox path) and an empty + // value disables. No-op when `enable_memory == false`. + memory_user_root: resolve_memory_user_root(), + // P10: git-as-transport sync. No-op when `enable_memory` + // is false (the underlying tree only exists when memory + // tools are registered). + enable_memory_sync: args.enable_memory_sync, ..Default::default() }; let mut tools = ToolRegistry::new(); @@ -615,6 +644,17 @@ async fn run_one_turn( event.reason, ); } + AgentEvent::ModeChanged { mode } => { + // CLI mirrors the WS handler: surface the + // mode change inline so the operator sees + // why the next turn behaves differently. + if delta_open { println!(); delta_open = false; } + eprintln!( + "{} permission mode → {:?}", + yellow("⇄"), + mode, + ); + } AgentEvent::Error { message } => { if delta_open { println!(); } return TurnOutcome::Error(message); diff --git a/apps/jarvis-web/src/components/AppChatPane.tsx b/apps/jarvis-web/src/components/AppChatPane.tsx index f201130..94ab8f7 100644 --- a/apps/jarvis-web/src/components/AppChatPane.tsx +++ b/apps/jarvis-web/src/components/AppChatPane.tsx @@ -12,6 +12,7 @@ import { AskTextCard } from "./Chat/AskTextCard"; import { ApprovalCard } from "./Approvals/ApprovalCard"; import { BypassBanner } from "./Approvals/BypassBanner"; import { ModeBadge } from "./Approvals/ModeBadge"; +import { ModeChangedToast } from "./Approvals/ModeChangedToast"; import { PlanModeBanner } from "./Approvals/PlanModeBanner"; import { PlanProposedCard } from "./Approvals/PlanProposedCard"; import { ModelMenu } from "./ModelMenu/ModelMenu"; @@ -19,6 +20,7 @@ import { UsageBadge } from "./UsageBadge"; import { ComposerShoulder } from "./ComposerShoulder"; import { ComposerProjectRail } from "./Composer/ComposerProjectRail"; import { OpenSidebarButton, WorkspacePanelMenu } from "./Workspace/WorkspaceToggles"; +import { BackgroundTasksButton } from "./BackgroundTasks/BackgroundTasksButton"; import { pickedRouting } from "../services/socket"; import { slashCommands } from "../services/slash_commands"; import { useAppStore } from "../store/appStore"; @@ -48,6 +50,7 @@ export function AppChatPane() {
+
@@ -56,6 +59,7 @@ export function AppChatPane() { + diff --git a/apps/jarvis-web/src/components/Approvals/ModeChangedToast.test.tsx b/apps/jarvis-web/src/components/Approvals/ModeChangedToast.test.tsx new file mode 100644 index 0000000..f23cb19 --- /dev/null +++ b/apps/jarvis-web/src/components/Approvals/ModeChangedToast.test.tsx @@ -0,0 +1,66 @@ +// Renders the M2.3 toast when the server reports a non-user +// mode change. Operator-initiated changes (via:"user" / absent) +// stay silent — verified separately so a future regression that +// pops a toast on every click is caught. + +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { act, render, screen } from "@testing-library/react"; +import { useAppStore } from "../../store/appStore"; +import { ModeChangedToast } from "./ModeChangedToast"; + +beforeEach(() => { + vi.useFakeTimers(); + useAppStore.getState().setRecentModeChange(null); +}); +afterEach(() => { + vi.useRealTimers(); +}); + +describe("ModeChangedToast", () => { + it("renders when the change came from the agent (via:tool)", () => { + act(() => { + useAppStore.getState().setRecentModeChange({ + mode: "plan", + via: "tool", + at: Date.now(), + }); + }); + render(); + expect(screen.getByRole("status")).toHaveTextContent( + /Agent.*switched permission mode to.*plan.*read-only/i, + ); + }); + + it("stays silent for operator-initiated changes (via:user)", () => { + act(() => { + useAppStore.getState().setRecentModeChange({ + mode: "auto", + via: "user", + at: Date.now(), + }); + }); + const { container } = render(); + expect(container).toBeEmptyDOMElement(); + }); + + it("renders nothing when no recent change is recorded", () => { + const { container } = render(); + expect(container).toBeEmptyDOMElement(); + }); + + it("can be dismissed via the × button", () => { + act(() => { + useAppStore.getState().setRecentModeChange({ + mode: "plan", + via: "tool", + at: Date.now(), + }); + }); + render(); + const close = screen.getByLabelText("Dismiss"); + act(() => { + close.click(); + }); + expect(useAppStore.getState().recentModeChange).toBeNull(); + }); +}); diff --git a/apps/jarvis-web/src/components/Approvals/ModeChangedToast.tsx b/apps/jarvis-web/src/components/Approvals/ModeChangedToast.tsx new file mode 100644 index 0000000..6587fa6 --- /dev/null +++ b/apps/jarvis-web/src/components/Approvals/ModeChangedToast.tsx @@ -0,0 +1,72 @@ +// M2.3 UX: transient toast that surfaces an out-of-band +// permission-mode change. The mode-badge in the header updates +// silently, which is fine when the operator clicked it +// themselves — but when the agent self-switched via +// `enter_plan_mode`, the user needs a visible cue or they'll +// wonder why the next turn behaves differently. +// +// The store action only fires this when the `via` field is +// present (server-emitted). Operator-initiated changes through +// the same handler don't include `via` (or pass `via:"user"`), +// so this stays dormant for the common case. +// +// Auto-clears after `AUTO_CLEAR_MS`. Sticky-style: a brand-new +// change in the same window resets the timer and re-shows. + +import { useEffect, useState } from "react"; +import { useAppStore } from "../../store/appStore"; + +const AUTO_CLEAR_MS = 6000; + +export function ModeChangedToast() { + const recent = useAppStore((s) => s.recentModeChange); + const clear = useAppStore((s) => s.setRecentModeChange); + const [hiddenAt, setHiddenAt] = useState(null); + + useEffect(() => { + if (!recent) return; + // Reset any prior hide-debounce when a fresh change arrives. + setHiddenAt(null); + const id = window.setTimeout(() => { + setHiddenAt(Date.now()); + clear(null); + }, AUTO_CLEAR_MS); + return () => window.clearTimeout(id); + }, [recent, clear]); + + // Operator-initiated changes (via:"user" or no via) are silent — + // the mode badge already reflects the click, no need to toast. + if (!recent) return null; + if (recent.via === "user") return null; + if (hiddenAt !== null && hiddenAt > recent.at) return null; + + return ( +
+ + + {describe(recent.via)} switched permission mode to{" "} + {recent.mode} + {recent.mode === "plan" ? " — next turn will be read-only." : ""} + + +
+ ); +} + +function describe(via: string): string { + switch (via) { + case "tool": + return "Agent"; + case "plan_accepted": + return "Plan accept"; + default: + return "Mode change"; + } +} diff --git a/apps/jarvis-web/src/components/BackgroundTasks/BackgroundTasksButton.tsx b/apps/jarvis-web/src/components/BackgroundTasks/BackgroundTasksButton.tsx new file mode 100644 index 0000000..c8fba68 --- /dev/null +++ b/apps/jarvis-web/src/components/BackgroundTasks/BackgroundTasksButton.tsx @@ -0,0 +1,39 @@ +// Header trigger for ``. Lives next to +// `` in ``'s header-actions slot. +// The panel itself is portal-free and renders right under the button +// — the visible chrome is just an icon + label; click toggles open. + +import { useState } from "react"; +import { BackgroundTasksPanel } from "./BackgroundTasksPanel"; + +export function BackgroundTasksButton() { + const [open, setOpen] = useState(false); + return ( +
+ + setOpen(false)} /> +
+ ); +} diff --git a/apps/jarvis-web/src/components/BackgroundTasks/BackgroundTasksPanel.tsx b/apps/jarvis-web/src/components/BackgroundTasks/BackgroundTasksPanel.tsx new file mode 100644 index 0000000..0c950b1 --- /dev/null +++ b/apps/jarvis-web/src/components/BackgroundTasks/BackgroundTasksPanel.tsx @@ -0,0 +1,165 @@ +// Background-tasks panel — a single "what's in flight right now" +// view that aggregates chat turns, subagent runs, and (over time) +// auto-mode picks / MCP / shell jobs into one list. Backed by the +// `GET /v1/tasks` aggregator; polled while open and immediately +// stopped when the panel closes so a quiet system doesn't pay for +// idle fetches. + +import { useEffect, useRef, useState } from "react"; +import { apiUrl } from "../../services/api"; +import { useAppStore } from "../../store/appStore"; + +// Safety-net poll: the server pushes `tasks_snapshot` frames at +// every turn boundary (P7), so under normal use the panel gets +// real-time updates and the poll just covers "first open" and +// "WS hiccup" gaps. 15s is gentle on the backend while still +// catching missed pushes within a typical usage window. +const POLL_INTERVAL_MS = 15000; + +type TaskKind = "chat_run" | "subagent_run" | "requirement_run" | "mcp_server"; + +interface TaskEntry { + kind: TaskKind; + id: string; + label: string; + status: string; + started_at: number; + updated_at: number; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + detail: any; +} + +interface TasksResponse { + items: TaskEntry[]; + generated_at: number; +} + +interface Props { + open: boolean; + onClose: () => void; +} + +export function BackgroundTasksPanel({ open, onClose }: Props) { + // Pushed snapshot from the WS `tasks_snapshot` frame; preferred + // over the panel's own poll when present so the panel reflects + // server state in near-real-time. + const pushed = useAppStore((s) => s.backgroundTasksSnapshot); + const [polled, setPolled] = useState(null); + const [error, setError] = useState(null); + const [loading, setLoading] = useState(false); + const timerRef = useRef(null); + const tasks: TaskEntry[] = + (pushed as TaskEntry[] | null) ?? polled ?? []; + + useEffect(() => { + if (!open) { + if (timerRef.current != null) { + window.clearInterval(timerRef.current); + timerRef.current = null; + } + return; + } + let cancelled = false; + async function fetchOnce() { + try { + setLoading(true); + const r = await fetch(apiUrl("/v1/tasks")); + if (!r.ok) { + if (!cancelled) setError(`HTTP ${r.status}`); + return; + } + const body = (await r.json()) as TasksResponse; + if (!cancelled) { + setPolled(body.items); + setError(null); + } + } catch (e) { + if (!cancelled) setError(String(e)); + } finally { + if (!cancelled) setLoading(false); + } + } + void fetchOnce(); + timerRef.current = window.setInterval(() => { + void fetchOnce(); + }, POLL_INTERVAL_MS); + return () => { + cancelled = true; + if (timerRef.current != null) { + window.clearInterval(timerRef.current); + timerRef.current = null; + } + }; + }, [open]); + + if (!open) return null; + + return ( +
+
+

Background tasks

+ +
+
+ {error && ( +
+ Failed to load: {error} +
+ )} + {!error && tasks.length === 0 && !loading && ( +
No active work right now.
+ )} + {tasks.length > 0 && ( +
    + {tasks.map((t) => ( +
  • + {kindLabel(t.kind)} + {t.label} + {t.status} + {relativeAge(t.started_at)} +
  • + ))} +
+ )} +
+
+ ); +} + +function kindLabel(k: TaskKind): string { + switch (k) { + case "chat_run": + return "Chat"; + case "subagent_run": + return "SubAgent"; + case "requirement_run": + return "Requirement"; + case "mcp_server": + return "MCP"; + default: + return k; + } +} + +function relativeAge(startedAt: number): string { + const dt = Math.max(0, Date.now() - startedAt); + if (dt < 1000) return "just now"; + const sec = Math.floor(dt / 1000); + if (sec < 60) return `${sec}s`; + const min = Math.floor(sec / 60); + if (min < 60) return `${min}m`; + const hr = Math.floor(min / 60); + return `${hr}h`; +} diff --git a/apps/jarvis-web/src/components/Chat/CollapsedToolGroup.tsx b/apps/jarvis-web/src/components/Chat/CollapsedToolGroup.tsx new file mode 100644 index 0000000..f990b81 --- /dev/null +++ b/apps/jarvis-web/src/components/Chat/CollapsedToolGroup.tsx @@ -0,0 +1,116 @@ +// Folded card for a run of consecutive assistant iterations whose +// tool calls are all read-only (`fs.read`, `code.grep`, `git.*`, +// `workspace.context`, etc.) and whose visible content is empty. +// +// Without this fold, a long investigation loop (read → grep → read +// → grep → read → ... before finally writing a patch) bloats the +// transcript with 5–10 near-identical "Read 1 file" rows the user +// has to skim past. The fold collapses them into one summary like +// "Read 6 files, ran 3 greps (across 9 steps) ▸" with a click-to- +// expand that re-shows the original `AssistantBubble`s inline. +// +// The fold rule is enforced upstream in ``; this +// component just renders whatever it's handed. Folding is +// strictly opt-in by the upstream classifier — anything that +// touched a write/exec/mutating tool stays as its own bubble. + +import { useMemo, useState } from "react"; +import { useAppStore } from "../../store/appStore"; +import { AssistantBubble } from "./AssistantBubble"; +import { + aggregateStepStatus, + describeStep, +} from "./toolStepSummary"; +import { t } from "../../utils/i18n"; +import type { UiMessage } from "../../store/types"; + +interface Props { + /// Consecutive assistant `UiMessage`s being folded. Must each + /// carry only read-only tool calls and no visible content — + /// the classifier in `` is the source of truth for + /// that invariant. + messages: Array>; +} + +export function CollapsedToolGroup({ messages }: Props) { + // Same Zustand discipline as ToolStepRow: subscribe to the raw + // map and derive the flat block list via useMemo so the selector + // doesn't churn on every store snapshot. + const allBlocks = useAppStore((s) => s.toolBlocks); + const blocks = useMemo(() => { + const flat = []; + for (const m of messages) { + for (const id of m.toolCallIds) { + const b = allBlocks[id]; + if (b) flat.push(b); + } + } + return flat; + }, [messages, allBlocks]); + + const [manualOpen, setManualOpen] = useState(null); + const status = aggregateStepStatus(blocks); + // Auto-expand while anything in the run is still working so the + // user sees live progress, mirroring `ToolStepRow`'s pattern. + const defaultOpen = status === "running"; + const open = manualOpen ?? defaultOpen; + + if (blocks.length === 0) return null; + const summary = describeStep(blocks); + const badge = + status === "ok" || status === "empty" + ? null + : (({ + running: t("running"), + denied: t("denied", ""), + error: t("error"), + } as Record)[status] || status); + + return ( +
+ + {open ? ( +
+ {messages.map((m, idx) => ( + 0} + /> + ))} +
+ ) : null} +
+ ); +} diff --git a/apps/jarvis-web/src/components/Chat/MessageList.test.ts b/apps/jarvis-web/src/components/Chat/MessageList.test.ts new file mode 100644 index 0000000..0756ad5 --- /dev/null +++ b/apps/jarvis-web/src/components/Chat/MessageList.test.ts @@ -0,0 +1,157 @@ +// Tests for the transcript folding classifier + grouper used by +// ``. These are pure data-shape tests — no React +// involved — so they stay fast and stable across UI tweaks. + +import { describe, expect, it } from "vitest"; +import { groupForFolding, isFoldable } from "./MessageList"; +import type { ToolBlockEntry, UiMessage } from "../../store/types"; + +function block(over: Partial): ToolBlockEntry { + return { + id: over.id ?? "t", + name: over.name ?? "fs.read", + args: over.args ?? {}, + status: over.status ?? "ok", + output: over.output ?? null, + progress: over.progress ?? "", + decisionSource: null, + startedAt: 0, + finishedAt: 0, + }; +} + +function assistant(over: { + uid: string; + toolCallIds: string[]; + content?: string; + reasoning?: string; + finalised?: boolean; +}): UiMessage { + return { + uid: over.uid, + kind: "assistant", + content: over.content ?? "", + reasoning: over.reasoning ?? "", + toolCallIds: over.toolCallIds, + finalised: over.finalised ?? true, + }; +} + +function user(uid: string, content = "hi", userOrdinal = 1): UiMessage { + return { uid, kind: "user", content, userOrdinal }; +} + +describe("isFoldable", () => { + it("folds an assistant with one read-only tool call and no content", () => { + const m = assistant({ uid: "a", toolCallIds: ["t1"] }); + const blocks = { t1: block({ id: "t1", name: "fs.read" }) }; + expect(isFoldable(m, blocks)).toBe(true); + }); + + it("rejects when the assistant has visible content", () => { + const m = assistant({ uid: "a", toolCallIds: ["t1"], content: "thinking..." }); + const blocks = { t1: block({ id: "t1", name: "fs.read" }) }; + expect(isFoldable(m, blocks)).toBe(false); + }); + + it("rejects when any tool call is a write/exec tool", () => { + const m = assistant({ uid: "a", toolCallIds: ["t1", "t2"] }); + const blocks = { + t1: block({ id: "t1", name: "fs.read" }), + t2: block({ id: "t2", name: "fs.edit" }), + }; + expect(isFoldable(m, blocks)).toBe(false); + }); + + it("rejects when a referenced tool block is missing", () => { + const m = assistant({ uid: "a", toolCallIds: ["t1"] }); + const blocks = {}; + expect(isFoldable(m, blocks)).toBe(false); + }); + + it("rejects an assistant with no tool calls", () => { + const m = assistant({ uid: "a", toolCallIds: [] }); + expect(isFoldable(m, {})).toBe(false); + }); + + it("rejects an unknown tool name (don't aggressively fold unknowns)", () => { + const m = assistant({ uid: "a", toolCallIds: ["t1"] }); + const blocks = { t1: block({ id: "t1", name: "totally.new.mcp.tool" }) }; + expect(isFoldable(m, blocks)).toBe(false); + }); +}); + +describe("groupForFolding", () => { + it("folds 3 consecutive read-only iterations into one group", () => { + const msgs: UiMessage[] = [ + user("u1"), + assistant({ uid: "a1", toolCallIds: ["t1"] }), + assistant({ uid: "a2", toolCallIds: ["t2"] }), + assistant({ uid: "a3", toolCallIds: ["t3"] }), + assistant({ uid: "a4", toolCallIds: ["t4"] }), // the final reply + ]; + const blocks = { + t1: block({ id: "t1", name: "fs.read" }), + t2: block({ id: "t2", name: "code.grep" }), + t3: block({ id: "t3", name: "git.status" }), + t4: block({ id: "t4", name: "fs.edit" }), // not foldable → standalone + }; + const groups = groupForFolding(msgs, blocks); + expect(groups.map((g) => g.kind)).toEqual(["single", "folded", "single"]); + if (groups[1].kind === "folded") { + expect(groups[1].messages.map((m) => m.uid)).toEqual(["a1", "a2", "a3"]); + } + }); + + it("leaves a sub-threshold run inline", () => { + const msgs: UiMessage[] = [ + user("u1"), + assistant({ uid: "a1", toolCallIds: ["t1"] }), + assistant({ uid: "a2", toolCallIds: ["t2"] }), // only 2 read-only — below MIN_GROUP_SIZE + assistant({ uid: "a3", toolCallIds: ["t3"] }), + ]; + const blocks = { + t1: block({ id: "t1", name: "fs.read" }), + t2: block({ id: "t2", name: "fs.read" }), + t3: block({ id: "t3", name: "fs.edit" }), + }; + const groups = groupForFolding(msgs, blocks); + expect(groups.map((g) => g.kind)).toEqual([ + "single", + "single", + "single", + "single", + ]); + }); + + it("does not fold across a non-foldable interruption", () => { + const msgs: UiMessage[] = [ + assistant({ uid: "a1", toolCallIds: ["t1"] }), + assistant({ uid: "a2", toolCallIds: ["t2"] }), + assistant({ uid: "a3", toolCallIds: ["t3"], content: "I think we should..." }), // breaks the run + assistant({ uid: "a4", toolCallIds: ["t4"] }), + assistant({ uid: "a5", toolCallIds: ["t5"] }), + assistant({ uid: "a6", toolCallIds: ["t6"] }), + ]; + const blocks = { + t1: block({ id: "t1", name: "fs.read" }), + t2: block({ id: "t2", name: "fs.read" }), + t3: block({ id: "t3", name: "fs.read" }), + t4: block({ id: "t4", name: "fs.read" }), + t5: block({ id: "t5", name: "fs.read" }), + t6: block({ id: "t6", name: "fs.read" }), + }; + const groups = groupForFolding(msgs, blocks); + // a1+a2 are sub-threshold (only 2) → inline. a3 standalone. + // a4+a5+a6 hit threshold → folded. + expect(groups.map((g) => g.kind)).toEqual([ + "single", + "single", + "single", + "folded", + ]); + if (groups[3].kind === "folded") { + expect(groups[3].messages.map((m) => m.uid)).toEqual(["a4", "a5", "a6"]); + } + }); +}); diff --git a/apps/jarvis-web/src/components/Chat/MessageList.tsx b/apps/jarvis-web/src/components/Chat/MessageList.tsx index 9539d58..6d0e0aa 100644 --- a/apps/jarvis-web/src/components/Chat/MessageList.tsx +++ b/apps/jarvis-web/src/components/Chat/MessageList.tsx @@ -3,7 +3,17 @@ // scroll-to-bottom strategy lives in `useStickToBottom` — see that // file's header for why a naive "scroll on every render" effect // doesn't work with the async XMarkdown subtree. +// +// MessageList is also the "view transformer" layer: before +// rendering, it groups consecutive assistant iterations whose tool +// calls are *all* read-only and whose visible content is empty +// into a single `` card. This keeps a long +// investigation loop (read → grep → read → grep → … before the +// real edit) from drowning the transcript. The fold threshold is +// `MIN_GROUP_SIZE` — small enough to be useful, large enough that +// brief lookups don't get hidden behind an extra click. +import { useMemo } from "react"; import { useAppStore } from "../../store/appStore"; import { useStickToBottom } from "../../hooks/useStickToBottom"; import { UserBubble } from "./UserBubble"; @@ -11,20 +21,100 @@ import { AssistantBubble } from "./AssistantBubble"; import { AgentLoadingFooter } from "./AgentLoadingFooter"; import { WelcomeScreen } from "./WelcomeScreen"; import { EmptyConvoHint } from "./EmptyConvoHint"; +import { CollapsedToolGroup } from "./CollapsedToolGroup"; import { MarkdownView } from "./MarkdownView"; +import { isReadOnlyTool } from "./toolStepSummary"; import { t } from "../../utils/i18n"; +import type { UiMessage, ToolBlockEntry } from "../../store/types"; + +const MIN_GROUP_SIZE = 3; + +type AssistantMsg = Extract; + +type Group = + | { kind: "single"; message: UiMessage } + | { kind: "folded"; messages: AssistantMsg[] }; + +/// True when this assistant message qualifies for folding: it has +/// at least one tool call, every tool call is a known read-only +/// tool, and its visible body content is empty (whitespace-only is +/// treated as empty). Reasoning is allowed — it lives inside a +/// collapsed disclosure either way. +/// +/// Exported for testing — see [`groupForFolding`]. +export function isFoldable( + m: UiMessage, + toolBlocks: Record, +): m is AssistantMsg { + if (m.kind !== "assistant") return false; + if (m.toolCallIds.length === 0) return false; + if (m.content.trim().length > 0) return false; + for (const id of m.toolCallIds) { + const b = toolBlocks[id]; + // Missing block = can't classify safely → don't fold. + if (!b) return false; + if (!isReadOnlyTool(b.name)) return false; + } + return true; +} + +/// Walk `messages` once, batching runs of foldable assistant +/// iterations of length >= MIN_GROUP_SIZE into a `folded` group; +/// everything else stays as its own `single` entry. Runs shorter +/// than the threshold pass through unchanged so brief reads still +/// render inline. +/// +/// Exported for testing — the rest of the SPA should never need to +/// call this directly. The classifier `isFoldable` is exposed +/// alongside for the same reason. +export function groupForFolding( + messages: UiMessage[], + toolBlocks: Record, +): Group[] { + const out: Group[] = []; + let buf: AssistantMsg[] = []; + const flushBuf = () => { + if (buf.length >= MIN_GROUP_SIZE) { + out.push({ kind: "folded", messages: buf }); + } else { + for (const m of buf) out.push({ kind: "single", message: m }); + } + buf = []; + }; + for (const m of messages) { + if (isFoldable(m, toolBlocks)) { + buf.push(m); + } else { + flushBuf(); + out.push({ kind: "single", message: m }); + } + } + flushBuf(); + return out; +} export function MessageList() { const messages = useAppStore((s) => s.messages); + const toolBlocks = useAppStore((s) => s.toolBlocks); const activeId = useAppStore((s) => s.activeId); const emptyHint = useAppStore((s) => s.emptyHintIdShort); const { ref } = useStickToBottom({ activeId }); + const groups = useMemo( + () => groupForFolding(messages, toolBlocks), + [messages, toolBlocks], + ); + return (
{messages.length === 0 && !emptyHint && } {messages.length === 0 && emptyHint && } - {messages.map((m, i) => { + {groups.map((g, gi) => { + if (g.kind === "folded") { + const head = g.messages[0]; + return ; + } + const m = g.message; if (m.kind === "user") { return ( = new Set([ + "fs.read", + "fs.list", + "code.grep", + "grep", + "git.status", + "git.diff", + "git.log", + "git.show", + "workspace.context", + "project.checks", + "time.now", + "http.fetch", + "echo", + "doc.list", + "doc.search", + "doc.get", + "doc.draft.get", +]); + +/// True when the tool is safe to fold under the transcript +/// "read-only run" rule. Unknown / new tools default to false — +/// folding aggressively for unrecognised tools could hide a +/// dangerous mutation behind a "Read 5 files" summary. +export function isReadOnlyTool(name: string): boolean { + return READ_ONLY_TOOL_NAMES.has(name); +} + /// Aggregate status for the whole step. Drives the row's badge: /// • any running → "running" /// • any error → "error" diff --git a/apps/jarvis-web/src/components/Composer/AutoActivatedSkillsChip.test.tsx b/apps/jarvis-web/src/components/Composer/AutoActivatedSkillsChip.test.tsx new file mode 100644 index 0000000..074d9cd --- /dev/null +++ b/apps/jarvis-web/src/components/Composer/AutoActivatedSkillsChip.test.tsx @@ -0,0 +1,50 @@ +// Composer chip surfaces server-predicted next-turn skill +// auto-activations. Renders nothing when the list is empty so a +// quiet session doesn't pay for chrome. + +import { beforeEach, describe, expect, it } from "vitest"; +import { act, render, screen } from "@testing-library/react"; +import { useAppStore } from "../../store/appStore"; +import { AutoActivatedSkillsChip } from "./AutoActivatedSkillsChip"; + +beforeEach(() => { + useAppStore.getState().setAutoActivatedNextTurnSkills([]); +}); + +describe("AutoActivatedSkillsChip", () => { + it("renders nothing on an empty list", () => { + const { container } = render(); + expect(container).toBeEmptyDOMElement(); + }); + + it("renders one pill per skill name", () => { + act(() => { + useAppStore.getState().setAutoActivatedNextTurnSkills([ + "rs-helper", + "tsx-helper", + ]); + }); + render(); + expect(screen.getByRole("status")).toHaveTextContent( + /Auto-activated for next turn/i, + ); + expect(screen.getByText("rs-helper")).toBeInTheDocument(); + expect(screen.getByText("tsx-helper")).toBeInTheDocument(); + }); + + it("dedupes and skips blanks via store action", () => { + act(() => { + useAppStore.getState().setAutoActivatedNextTurnSkills([ + "rs-helper", + "rs-helper", + "", + " ", + "tsx-helper", + ]); + }); + render(); + // Each pill text appears once. + expect(screen.getAllByText("rs-helper")).toHaveLength(1); + expect(screen.getAllByText("tsx-helper")).toHaveLength(1); + }); +}); diff --git a/apps/jarvis-web/src/components/Composer/AutoActivatedSkillsChip.tsx b/apps/jarvis-web/src/components/Composer/AutoActivatedSkillsChip.tsx new file mode 100644 index 0000000..f560d42 --- /dev/null +++ b/apps/jarvis-web/src/components/Composer/AutoActivatedSkillsChip.tsx @@ -0,0 +1,33 @@ +// M3.3 UX: tiny notice above the composer telling the user which +// skills *will* auto-activate on the next user turn given the +// files the agent touched in the previous turn. Backed by the +// `skill_auto_activated_for_next_turn` WS frame; falls back to +// rendering nothing when no skills match — no chrome cost on +// quiet sessions. +// +// Self-contained on purpose: the chip subscribes to its single +// store field and renders inline; no portal, no popover. Sits +// between the form opening tag and the input-wrapper in Composer +// so it visually attaches to the input the user is about to type +// into. + +import { useAppStore } from "../../store/appStore"; + +export function AutoActivatedSkillsChip() { + const skills = useAppStore((s) => s.autoActivatedNextTurnSkills); + if (!skills || skills.length === 0) return null; + return ( +
+ + Auto-activated for next turn: + +
    + {skills.map((name) => ( +
  • + {name} +
  • + ))} +
+
+ ); +} diff --git a/apps/jarvis-web/src/components/Composer/Composer.tsx b/apps/jarvis-web/src/components/Composer/Composer.tsx index 18817bc..11e7a9f 100644 --- a/apps/jarvis-web/src/components/Composer/Composer.tsx +++ b/apps/jarvis-web/src/components/Composer/Composer.tsx @@ -8,6 +8,7 @@ import { useEffect, useRef, useState } from "react"; import { useAppStore } from "../../store/appStore"; import { t } from "../../utils/i18n"; import { SendButton, StopButton } from "../ComposerButtons"; +import { AutoActivatedSkillsChip } from "./AutoActivatedSkillsChip"; import { SlashPalette, type SlashCommand } from "./SlashPalette"; import { sendFrame, isOpen } from "../../services/socket"; import { startConversationTurn } from "../../services/conversationSockets"; @@ -163,6 +164,7 @@ export function Composer({ slashCommands, pickedRouting, metaChildren }: Props) autoComplete="off" onSubmit={(e) => { e.preventDefault(); submit(); }} > +