diff --git a/.changeset/needs-approval-session-ctx.md b/.changeset/needs-approval-session-ctx.md new file mode 100644 index 000000000..625099eb4 --- /dev/null +++ b/.changeset/needs-approval-session-ctx.md @@ -0,0 +1,5 @@ +--- +"eve": patch +--- + +`needsApproval` now receives `session` alongside `toolName`, `toolInput`, and `approvedTools`. Use `session.auth.current` to skip approval for schedule-triggered runs (`principalId: "eve:app"`) while still prompting when a person triggers the same tool. diff --git a/docs/tools/human-in-the-loop.md b/docs/tools/human-in-the-loop.md index a9b0af5b6..a332461b5 100644 --- a/docs/tools/human-in-the-loop.md +++ b/docs/tools/human-in-the-loop.md @@ -38,7 +38,7 @@ export default defineTool({ By default, omitted `needsApproval` behaves like `never()`, so tool calls may execute without human approval. Require human approval or other safeguards for sensitive, irreversible, regulated, financial, healthcare, employment, housing, legal, safety-impacting, user-impacting, or external side-effecting actions. -When the decision depends on the input, pass your own predicate instead of a helper. It receives `{ toolName, toolInput, approvedTools }` and returns a boolean. `toolInput` can be undefined, so guard the access. To require approval only when an amount crosses a threshold: +When the decision depends on the input, pass your own predicate instead of a helper. It receives `{ toolName, toolInput, approvedTools, session }` and returns a boolean. `toolInput` can be undefined, so guard the access. To require approval only when an amount crosses a threshold: ```ts needsApproval: ({ toolInput }) => (toolInput?.amount ?? 0) > 1000, @@ -46,6 +46,27 @@ needsApproval: ({ toolInput }) => (toolInput?.amount ?? 0) > 1000, Gating a side effect on approval is also how you make non-idempotent work safe across replays: a charge or email that sits behind `always()` can't fire from a re-run step without a fresh human decision. +### Skipping approval for schedules + +`session.auth.current` identifies who triggered the run. Schedule-initiated sessions carry `principalId: "eve:app"` and `principalType: "runtime"`. Use this to skip approval for automated runs while still prompting when a person triggers the same tool: + +```ts title="agent/tools/refund_charge.ts" +import { defineTool } from "eve/tools"; +import { z } from "zod"; + +export default defineTool({ + description: "Refund a charge.", + inputSchema: z.object({ chargeId: z.string(), amount: z.number() }), + needsApproval: ({ session }) => + session.auth.current?.principalId !== "eve:app", + async execute(input) { + return refund(input); + }, +}); +``` + +`session` in `needsApproval` has the same shape as `ctx.session` in `execute`: `id`, `auth`, `turn`, and an optional `parent`. Skipping approval on scheduled runs means any non-idempotent side effect will re-fire if a step replays, so pair this pattern with idempotency keys or `once()` where needed. + ## Questions The built-in `ask_question` tool lets the model pause and ask the user, rather than guessing. It has no `execute` — the model calls it with `{ prompt, options?, allowFreeform? }`: diff --git a/packages/eve/src/context/dynamic-tool-lifecycle.test.ts b/packages/eve/src/context/dynamic-tool-lifecycle.test.ts index d25dfb23a..d486fecbf 100644 --- a/packages/eve/src/context/dynamic-tool-lifecycle.test.ts +++ b/packages/eve/src/context/dynamic-tool-lifecycle.test.ts @@ -6,7 +6,7 @@ import { defineTool } from "#public/definitions/tool.js"; vi.mock("#context/build-callback-context.js", () => ({ buildCallbackContext: () => ({ - session: { id: "test", auth: { current: null, initiator: null }, turn: {} }, + session: { id: "test", auth: { current: null, initiator: null }, turn: { id: "test-turn", sequence: 0 } }, }), })); @@ -880,6 +880,7 @@ describe("framework dynamic tools (no bundler transform)", () => { expect( tools[0]!.needsApproval!({ approvedTools: new Set(), + session: { id: "test", auth: { current: null, initiator: null }, turn: { id: "test-turn", sequence: 0 } }, toolInput: undefined, toolName: "risky", }), @@ -916,12 +917,14 @@ describe("framework dynamic tools (no bundler transform)", () => { expect( tools[0]!.needsApproval!({ approvedTools: new Set(), + session: { id: "test", auth: { current: null, initiator: null }, turn: { id: "test-turn", sequence: 0 } }, toolInput: { draftId: "draft_123" }, toolName: "guarded", }), ).toBe(true); expect(approvalFn).toHaveBeenCalledExactlyOnceWith({ approvedTools: new Set(), + session: { id: "test", auth: { current: null, initiator: null }, turn: { id: "test-turn", sequence: 0 } }, toolInput: { draftId: "draft_123" }, toolName: "guarded", }); diff --git a/packages/eve/src/harness/code-mode.test.ts b/packages/eve/src/harness/code-mode.test.ts index e7df568a4..c14c322d5 100644 --- a/packages/eve/src/harness/code-mode.test.ts +++ b/packages/eve/src/harness/code-mode.test.ts @@ -1,5 +1,5 @@ import { jsonSchema, tool } from "ai"; -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; import { applySandboxToolSet, buildSandboxHostTools } from "#harness/code-mode.js"; import { CODE_MODE_SURFACE, WORKFLOW_SURFACE } from "#harness/sandbox-surface.js"; @@ -11,6 +11,12 @@ import { buildToolSet } from "#harness/tools.js"; import type { HarnessToolMap } from "#harness/types.js"; import { isCodeModeEnvEnabled, resolveCodeModeEnabled } from "#shared/code-mode.js"; +vi.mock("#context/build-callback-context.js", () => ({ + buildCallbackContext: () => ({ + session: { id: "test", auth: { current: null, initiator: null }, turn: { id: "test-turn", sequence: 0 } }, + }), +})); + describe("resolveCodeModeEnabled", () => { it("reads the EVE_EXPERIMENTAL_CODE_MODE backstop", () => { expect(isCodeModeEnvEnabled({ EVE_EXPERIMENTAL_CODE_MODE: "1" })).toBe(true); diff --git a/packages/eve/src/harness/input-requests.test.ts b/packages/eve/src/harness/input-requests.test.ts index d1716a6f9..13823943a 100644 --- a/packages/eve/src/harness/input-requests.test.ts +++ b/packages/eve/src/harness/input-requests.test.ts @@ -1,5 +1,11 @@ import { jsonSchema, type ModelMessage } from "ai"; -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; + +vi.mock("#context/build-callback-context.js", () => ({ + buildCallbackContext: () => ({ + session: { id: "test", auth: { current: null, initiator: null }, turn: { id: "test-turn", sequence: 0 } }, + }), +})); import { once } from "#public/tools/approval/approval-helpers.js"; import type { InputRequest } from "#runtime/input/types.js"; diff --git a/packages/eve/src/harness/tool-loop.test.ts b/packages/eve/src/harness/tool-loop.test.ts index 787f98edd..77475db52 100644 --- a/packages/eve/src/harness/tool-loop.test.ts +++ b/packages/eve/src/harness/tool-loop.test.ts @@ -64,6 +64,12 @@ vi.mock("./instrumentation-config.js", () => ({ getInstrumentationConfig: (...args: unknown[]) => mockGetInstrumentationConfig(...args), })); +vi.mock("#context/build-callback-context.js", () => ({ + buildCallbackContext: () => ({ + session: { id: "test", auth: { current: null, initiator: null }, turn: { id: "test-turn", sequence: 0 } }, + }), +})); + vi.mock("./compaction.js", () => ({ compactMessages: vi.fn(), estimateTokens: vi.fn().mockReturnValue(5000), @@ -620,6 +626,7 @@ describe("createToolLoopHarness", () => { await expect(dynamicTool.needsApproval?.({ line: "victoria" })).resolves.toBe(true); expect(needsApproval).toHaveBeenCalledExactlyOnceWith({ approvedTools: new Set(), + session: { id: "test", auth: { current: null, initiator: null }, turn: { id: "test-turn", sequence: 0 } }, toolInput: { line: "victoria" }, toolName: "tfl__getLineStatus", }); diff --git a/packages/eve/src/harness/tools.test.ts b/packages/eve/src/harness/tools.test.ts index 061e09c69..290b769b3 100644 --- a/packages/eve/src/harness/tools.test.ts +++ b/packages/eve/src/harness/tools.test.ts @@ -1,5 +1,5 @@ import { type JSONSchema7, jsonSchema } from "ai"; -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; import { always, never, once } from "#public/tools/approval/approval-helpers.js"; import type { RuntimeModelReference } from "#runtime/agent/bootstrap.js"; @@ -13,6 +13,21 @@ import type { JsonObject } from "#shared/json.js"; import type { HarnessToolDefinition } from "#harness/execute-tool.js"; import { buildToolSet, buildToolSetWithProviderTools } from "#harness/tools.js"; import type { HarnessToolMap } from "#harness/types.js"; +import type { SessionContext } from "#public/definitions/callback-context.js"; + +// vi.mock is hoisted; define the fixture via vi.hoisted so the factory can +// reference it before module-level code runs. +const { TEST_SESSION } = vi.hoisted(() => ({ + TEST_SESSION: { + id: "test-session", + auth: { current: null, initiator: null }, + turn: { id: "test-turn", sequence: 0 }, + } satisfies SessionContext["session"], +})); + +vi.mock("#context/build-callback-context.js", () => ({ + buildCallbackContext: () => ({ session: TEST_SESSION }), +})); function getJsonSchema(tool: unknown): unknown { return (tool as { inputSchema: { jsonSchema: unknown } }).inputSchema.jsonSchema; @@ -648,6 +663,60 @@ describe("buildToolSet", () => { expect(capturedInput).toEqual(toolInput); }); + it("passes session from the active context into needsApproval", async () => { + let capturedSession: unknown; + const tools: HarnessToolMap = new Map([ + [ + "guarded", + { + description: "A guarded tool.", + execute: async () => "ok", + inputSchema: jsonSchema({}), + name: "guarded", + needsApproval: (ctx) => { + capturedSession = ctx.session; + return false; + }, + }, + ], + ]); + + const result = buildToolSet({ tools }); + const needsApproval = (result.guarded as { needsApproval?: NeedsApprovalFn }).needsApproval; + + await needsApproval?.({}, {}); + + expect(capturedSession).toEqual(TEST_SESSION); + }); + + it("skips approval when session principal matches a schedule runtime", async () => { + // Simulates the schedule principal check mukund described: skip approval + // for runs triggered by the runtime (eve:app) and require it otherwise. + // TEST_SESSION has auth.current === null (non-schedule), so the predicate + // below requires approval for it. A real schedule principal would carry + // principalId: "eve:app" and the predicate would return false. + const tools: HarnessToolMap = new Map([ + [ + "refund", + { + description: "Refund a charge.", + execute: async () => "ok", + inputSchema: jsonSchema({}), + name: "refund", + // Skip approval only for the schedule runtime principal. + needsApproval: ({ session }) => + session.auth.current?.principalId !== "eve:app", + }, + ], + ]); + + const result = buildToolSet({ tools }); + const needsApproval = (result.refund as { needsApproval?: NeedsApprovalFn }).needsApproval; + + // TEST_SESSION has current === null (not a schedule), so approval IS needed. + await expect(needsApproval?.({}, {})).resolves.toBe(true); + }); + it("input-aware approval skips when compound key is in approvedTools", async () => { const tools: HarnessToolMap = new Map([ [ diff --git a/packages/eve/src/harness/tools.ts b/packages/eve/src/harness/tools.ts index e4fca9603..35e74f5de 100644 --- a/packages/eve/src/harness/tools.ts +++ b/packages/eve/src/harness/tools.ts @@ -19,6 +19,7 @@ import { import { stashToolInterrupt } from "#harness/tool-interrupts.js"; import { withToolOutputSerializationError } from "#harness/tool-output-serialization.js"; import { isCodeModeToolExecutionOptions } from "#runtime/framework-tools/code-mode-connection-auth.js"; +import { buildCallbackContext } from "#context/build-callback-context.js"; type ToolModelOutputValue = | { readonly type: "json"; readonly value: JSONValue } @@ -296,9 +297,13 @@ function buildNeedsApprovalFn( if (definition.needsApproval === undefined) return false; const toolInputRecord = isObject(toolInput) ? toolInput : undefined; + // buildCallbackContext reads the ALS scope that is active when the + // approval predicate fires, so session is always current. + const { session } = buildCallbackContext(); return definition.needsApproval({ approvedTools: input.approvedTools ?? new Set(), + session, toolInput: toolInputRecord, toolName: definition.name, }); diff --git a/packages/eve/src/public/definitions/tool.ts b/packages/eve/src/public/definitions/tool.ts index 4806cf715..f052083e2 100644 --- a/packages/eve/src/public/definitions/tool.ts +++ b/packages/eve/src/public/definitions/tool.ts @@ -27,9 +27,13 @@ type ApprovalContextInput = unknown extends TInput ? Record> { readonly approvedTools: ReadonlySet; + readonly session: SessionContext["session"]; readonly toolInput?: ApprovalToolInput; readonly toolName: string; }