Merge remote-tracking branch 'origin/main' into brandon-cli-deploy-work

brandonkachen · brandonkachen · commit d23e975e8fea · 2025-10-22T13:50:13.000-07:00
diff --git a/.agents/base2/base2-with-files-input.ts b/.agents/base2/base2-with-files-input.ts
@@ -0,0 +1,55 @@
+import { SecretAgentDefinition } from 'types/secret-agent-definition'
+import { createBase2 } from './base2'
+
+const definition: SecretAgentDefinition = {
+  ...createBase2('fast'),
+  id: 'base2-with-files-input',
+  displayName: 'Buffy the Fast Orchestrator',
+
+  inputSchema: {
+    prompt: {
+      type: 'string',
+      description: 'A coding task to complete',
+    },
+    params: {
+      type: 'object',
+      properties: {
+        maxContextLength: {
+          type: 'number',
+        },
+        filesToRead: {
+          type: 'array',
+          items: {
+            type: 'string',
+          },
+        },
+      },
+      required: ['filesToRead'],
+    },
+  },
+
+  handleSteps: function* ({ params }) {
+    yield {
+      toolName: 'read_files',
+      input: { paths: params?.filesToRead || [] },
+    }
+
+    let steps = 0
+    while (true) {
+      steps++
+      // Run context-pruner before each step
+      yield {
+        toolName: 'spawn_agent_inline',
+        input: {
+          agent_type: 'context-pruner',
+          params: params ?? {},
+        },
+        includeToolCall: false,
+      } as any
+
+      const { stepsComplete } = yield 'STEP'
+      if (stepsComplete) break
+    }
+  },
+}
+export default definition
diff --git a/.agents/orchestrator/iterative-orchestrator/iterative-orchestrator-step.ts b/.agents/orchestrator/iterative-orchestrator/iterative-orchestrator-step.ts
@@ -37,15 +37,27 @@ const definition: SecretAgentDefinition = {
           type: 'object',
           properties: {
             title: { type: 'string' },
-            prompt: { type: 'string' },
+            prompt: {
+              type: 'string',
+              description:
+                'The exact prompt that will be sent to the agent that will implement or decide the step',
+            },
             type: { type: 'string', enum: ['implementation', 'decision'] },
-            successCriteria: { type: 'array', items: { type: 'string' } },
-            filesToReadHints: { type: 'array', items: { type: 'string' } },
+            filesToReadHints: {
+              type: 'array',
+              items: { type: 'string' },
+              description:
+                'Include paths to files that will help the agent implement or decide the step',
+            },
           },
           required: ['title', 'prompt', 'type'],
         },
       },
-      notes: { type: 'string' },
+      notes: {
+        type: 'string',
+        description:
+          'Any notes for the future orchestator agent. What you want to accomplish with these steps, why you chose them, and what you want to accomplish next. Also, estimate the remaining number of steps needed to complete the task.',
+      },
     },
     required: ['isDone', 'nextSteps', 'notes'],
   },
@@ -61,20 +73,12 @@ Important: you *must* make at least one tool call, via <codebuff_tool_call> synt
 - If only one step is needed next, return a single-item array.
 - Mark isDone=true only when the overall task is truly complete.
 
-Return JSON via set_output with:
-{
-  isDone: boolean,
-  nextSteps: [
-    {
-      title: string,
-      prompt: string,           // exact prompt to give to the implementor or decision maker
-      type: 'implementation' | 'decision',  // whether this is a coding task or decision
-      successCriteria?: string[] // 3-6 bullet checks that show this step is done
-      filesToReadHints?: string[] // optional globs/paths hints
-    }
-  ],
-  notes: string             // short rationale for these steps
-}
+## Guidelines
+- It's better to make small changes at a time and validate them as you go. Writing a lot of code without testing it or typechecking it or validating it in some way is not good!
+- Keep the scope of your changes as small as possible.
+- Try to complete your task in as few steps as possible.
+- There is a time limit on the number of steps you can take. If you reach the limit, you will be cut off prematurely before the task is complete.
+- Prefer not to parallelize steps if they are at all related, because you can get a better result by doing them sequentially.
 `,
   stepPrompt: `Important: you *must* make at least one tool call, via <codebuff_tool_call> syntax, in every response message!`,
 }
diff --git a/.agents/orchestrator/iterative-orchestrator/iterative-orchestrator.ts b/.agents/orchestrator/iterative-orchestrator/iterative-orchestrator.ts
@@ -9,7 +9,6 @@ type StepInfo = {
   title: string
   prompt: string
   type: 'implementation' | 'decision'
-  successCriteria?: string[]
   filesToReadHints?: string[]
 }
 
@@ -22,7 +21,7 @@ const definition: SecretAgentDefinition = {
     'Orchestrates the completion of a large task through batches of independent steps.',
   outputMode: 'structured_output',
   toolNames: ['spawn_agents', 'set_output'],
-  spawnableAgents: ['iterative-orchestrator-step', 'base2-fast'],
+  spawnableAgents: ['iterative-orchestrator-step', 'base2-with-files-input'],
 
   inputSchema: {
     prompt: { type: 'string', description: 'Overall task to complete' },
@@ -38,8 +37,10 @@ const definition: SecretAgentDefinition = {
     }[] = []
     let completed = false
     let iteration = 0
+    const maxIterations = 15
 
-    while (!completed) {
+    while (!completed && iteration < maxIterations) {
+      const remainingIterations = maxIterations - iteration
       iteration++
       // 1) Plan next step
       const planningBundle = [
@@ -93,18 +94,23 @@ const definition: SecretAgentDefinition = {
         break
       }
 
+      const reminder =
+        remainingIterations <= 5
+          ? `<reminder>You are approaching the MAXIMUM NUMBER OF ITERATIONS! You have ${remainingIterations} iterations left to complete the task, or at least get it into a working state. You must try to wrap up the task in the remaining iterations or be cut off!</system_remender>`
+          : `<reminder>You have ${remainingIterations} steps left to complete the task.</reminder>`
+
       // 3) Execute all steps in parallel
       const executionAgents = steps.map((step) => {
         if (step.type === 'decision') {
           return {
-            agent_type: 'base2-fast',
-            prompt: `DECISION TASK: ${step.prompt}\n\nThis is a decision-making step, not an implementation step. Your job is to research options, analyze trade-offs, and make a clear recommendation with rationale. Write out your decision in the last message. Do not create a file with your decision.`,
+            agent_type: 'base2-with-files-input',
+            prompt: `DECISION TASK: ${step.prompt}\n\nThis is a decision-making step, not an implementation step. Your job is to research options, analyze trade-offs, and make a clear recommendation with rationale. Write out your decision in the last message. Do not create a file with your decision. ${reminder}`,
             params: { filesToRead: step.filesToReadHints || [] },
           }
         } else {
           return {
-            agent_type: 'base2-fast',
-            prompt: step.prompt,
+            agent_type: 'base2-with-files-input',
+            prompt: `${step.prompt}\n\n${reminder}`,
             params: { filesToRead: step.filesToReadHints || [] },
           }
         }
diff --git a/common/src/types/contracts/database.ts b/common/src/types/contracts/database.ts
@@ -77,6 +77,7 @@ export type FinishAgentRunFn = (params: {
 }) => Promise<void>
 
 export type AddAgentStepFn = (params: {
+  apiKey: string
   userId: string | undefined
   agentRunId: string
   stepNumber: number
@@ -87,6 +88,6 @@ export type AddAgentStepFn = (params: {
   errorMessage?: string
   startTime: Date
   logger: Logger
-}) => Promise<string>
+}) => Promise<string | null>
 
 export type DatabaseAgentCache = Map<string, AgentTemplate | null>
diff --git a/npm-app/release/package.json b/npm-app/release/package.json
@@ -1,6 +1,6 @@
 {
   "name": "codebuff",
-  "version": "1.0.502",
+  "version": "1.0.503",
   "description": "AI coding agent",
   "license": "MIT",
   "bin": {
diff --git a/packages/agent-runtime/src/run-agent-step.ts b/packages/agent-runtime/src/run-agent-step.ts
@@ -450,6 +450,20 @@ export async function loopAgentSteps(
     ParamsExcluding<
       FinishAgentRunFn,
       'runId' | 'status' | 'totalSteps' | 'directCredits' | 'totalCredits'
+    > &
+    ParamsExcluding<
+      typeof runAgentStep,
+      'agentState' | 'prompt' | 'spawnParams' | 'system'
+    > &
+    ParamsExcluding<
+      AddAgentStepFn,
+      | 'agentRunId'
+      | 'stepNumber'
+      | 'credits'
+      | 'childRunIds'
+      | 'messageId'
+      | 'status'
+      | 'startTime'
     >,
 ): Promise<{
   agentState: AgentState
@@ -677,14 +691,6 @@ export async function loopAgentSteps(
         messageId,
       } = await runAgentStep({
         ...params,
-        userId,
-        userInputId,
-        clientSessionId,
-        fingerprintId,
-        onResponseChunk,
-        localAgentTemplates,
-        agentType,
-        fileContext,
         agentState: currentAgentState,
         prompt: currentPrompt,
         spawnParams: currentParams,
@@ -693,15 +699,14 @@ export async function loopAgentSteps(
 
       if (newAgentState.runId) {
         await addAgentStep({
-          userId,
+          ...params,
           agentRunId: newAgentState.runId,
           stepNumber: totalSteps,
           credits: newAgentState.directCreditsUsed - creditsBefore,
           childRunIds: newAgentState.childRunIds.slice(childrenBefore),
           messageId,
           status: 'completed',
           startTime,
-          logger,
         })
       } else {
         logger.error('No runId found for agent state after finishing agent run')
diff --git a/sdk/package.json b/sdk/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@codebuff/sdk",
   "private": false,
-  "version": "0.4.4",
+  "version": "0.4.5",
   "description": "Official SDK for Codebuff — AI coding agent & framework",
   "license": "Apache-2.0",
   "type": "module",
diff --git a/sdk/src/impl/agent-runtime.ts b/sdk/src/impl/agent-runtime.ts
@@ -2,6 +2,7 @@ import { trackEvent } from '@codebuff/common/analytics'
 import { success } from '@codebuff/common/util/error'
 
 import {
+  addAgentStep,
   fetchAgentFromDatabase,
   finishAgentRun,
   getUserInfoFromApiKey,
@@ -12,10 +13,13 @@ import type {
   AgentRuntimeDeps,
   AgentRuntimeScopedDeps,
 } from '@codebuff/common/types/contracts/agent-runtime'
+import type { Logger } from '@codebuff/common/types/contracts/logger'
 
-export const CLI_AGENT_RUNTIME_IMPL: Omit<
+export function getAgentRuntimeImpl(params: {
+  logger?: Logger
+  apiKey: string
+}): Omit<
   AgentRuntimeDeps & AgentRuntimeScopedDeps,
-  | 'addAgentStep'
   | 'promptAiSdkStream'
   | 'promptAiSdk'
   | 'promptAiSdkStructured'
@@ -26,50 +30,54 @@ export const CLI_AGENT_RUNTIME_IMPL: Omit<
   | 'requestOptionalFile'
   | 'sendAction'
   | 'sendSubagentChunk'
-> = {
-  // Database
-  getUserInfoFromApiKey,
-  fetchAgentFromDatabase,
-  startAgentRun,
-  finishAgentRun,
-  // addAgentStep: AddAgentStepFn
+> {
+  const { logger, apiKey } = params
+
+  return {
+    // Database
+    getUserInfoFromApiKey,
+    fetchAgentFromDatabase,
+    startAgentRun,
+    finishAgentRun,
+    addAgentStep,
 
-  // Billing
-  consumeCreditsWithFallback: async () =>
-    success({
-      chargedToOrganization: false,
-    }),
+    // Billing
+    consumeCreditsWithFallback: async () =>
+      success({
+        chargedToOrganization: false,
+      }),
 
-  // LLM
-  // promptAiSdkStream: PromptAiSdkStreamFn,
-  // promptAiSdk: PromptAiSdkFn,
-  // promptAiSdkStructured: PromptAiSdkStructuredFn,
+    // LLM
+    // promptAiSdkStream: PromptAiSdkStreamFn,
+    // promptAiSdk: PromptAiSdkFn,
+    // promptAiSdkStructured: PromptAiSdkStructuredFn,
 
-  // Mutable State
-  databaseAgentCache: new Map(),
-  liveUserInputRecord: {},
-  sessionConnections: {},
+    // Mutable State
+    databaseAgentCache: new Map(),
+    liveUserInputRecord: {},
+    sessionConnections: {},
 
-  // Analytics
-  trackEvent,
+    // Analytics
+    trackEvent,
 
-  // Other
-  logger: {
-    info: () => {},
-    debug: () => {},
-    warn: () => {},
-    error: () => {},
-  },
-  fetch: globalThis.fetch,
+    // Other
+    logger: logger ?? {
+      info: () => {},
+      debug: () => {},
+      warn: () => {},
+      error: () => {},
+    },
+    fetch: globalThis.fetch,
 
-  // Client (WebSocket)
-  // handleStepsLogChunk: HandleStepsLogChunkFn,
-  // requestToolCall: RequestToolCallFn,
-  // requestMcpToolData: RequestMcpToolDataFn,
-  // requestFiles: RequestFilesFn,
-  // requestOptionalFile: RequestOptionalFileFn,
-  // sendAction: SendActionFn,
-  // sendSubagentChunk: SendSubagentChunkFn,
+    // Client (WebSocket)
+    // handleStepsLogChunk: HandleStepsLogChunkFn,
+    // requestToolCall: RequestToolCallFn,
+    // requestMcpToolData: RequestMcpToolDataFn,
+    // requestFiles: RequestFilesFn,
+    // requestOptionalFile: RequestOptionalFileFn,
+    // sendAction: SendActionFn,
+    // sendSubagentChunk: SendSubagentChunkFn,
 
-  apiKey: process.env.CODEBUFF_API_KEY ?? '',
+    apiKey,
+  }
 }
diff --git a/sdk/src/impl/database.ts b/sdk/src/impl/database.ts
diff --git a/sdk/src/tools/run-terminal-command.ts b/sdk/src/tools/run-terminal-command.ts
diff --git a/sdk/test/tree-sitter-queries/package-lock.json b/sdk/test/tree-sitter-queries/package-lock.json

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "codebuff",`
`3`		`- "version": "1.0.502",`
	`3`	`+ "version": "1.0.503",`
`4`	`4`	`"description": "AI coding agent",`
`5`	`5`	`"license": "MIT",`
`6`	`6`	`"bin": {`
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@codebuff/sdk",`
`3`	`3`	`"private": false,`
`4`		`- "version": "0.4.4",`
	`4`	`+ "version": "0.4.5",`
`5`	`5`	`"description": "Official SDK for Codebuff — AI coding agent & framework",`
`6`	`6`	`"license": "Apache-2.0",`
`7`	`7`	`"type": "module",`