diff --git a/__tests__/context.test.ts b/__tests__/context.test.ts index 52dae1fe..e9341f72 100644 --- a/__tests__/context.test.ts +++ b/__tests__/context.test.ts @@ -354,21 +354,35 @@ export function validateEmail(email: string): boolean { expect(parsed.nodes).toBeDefined(); }); - it('should truncate long code blocks', async () => { - const result = await cg.buildContext('PaymentService', { + it('should not drop tail lines from a selected long code block', async () => { + const srcDir = path.join(testDir, 'src'); + const bodyLines = [ + 'export class LongService {', + ' run(): string {', + " const start = 'START_MARKER';", + ]; + for (let i = 0; i < 80; i++) { + bodyLines.push(` const filler${i} = '${i}';`); + } + bodyLines.push(" const finish = 'IMPORTANT_TAIL_MARKER';"); + bodyLines.push(' return start + finish;'); + bodyLines.push(' }'); + bodyLines.push('}'); + fs.writeFileSync(path.join(srcDir, 'long-service.ts'), bodyLines.join('\n')); + await cg.indexAll(); + + const result = await cg.buildContext('LongService run', { format: 'markdown', - maxCodeBlockSize: 100, + maxCodeBlockSize: 260, includeCode: true, + maxCodeBlocks: 1, }); const markdown = result as string; - // Long code blocks should be truncated - if (markdown.includes('```typescript')) { - // If there's a code block, check for truncation marker if content was long - // This test validates the truncation logic works - expect(typeof markdown).toBe('string'); - } + expect(markdown).toContain('START_MARKER'); + expect(markdown).toContain('IMPORTANT_TAIL_MARKER'); + expect(markdown).toContain('... (truncated middle) ...'); }); }); }); diff --git a/__tests__/symbol-lookup.test.ts b/__tests__/symbol-lookup.test.ts index c81aaabd..67ff0b2b 100644 --- a/__tests__/symbol-lookup.test.ts +++ b/__tests__/symbol-lookup.test.ts @@ -181,7 +181,26 @@ describe.skipIf(!HAS_SQLITE)('matchesSymbol — dotted lookups (regression for # fs.mkdirSync(src, { recursive: true }); fs.writeFileSync( path.join(src, 'session.ts'), - `export class Session {\n request(): void { fetch('x'); }\n}\nexport function request(): void {}\n` + `export class Session { + request(): void { + const marker = 'SESSION_BODY_MARKER'; + fetch(marker); + } +} +export function request(): void {} +` + ); + fs.writeFileSync( + path.join(src, 'large-session.ts'), + `export class LargeSession { + run(): void { + const start = 'LARGE_BODY_START_MARKER'; +${Array.from({ length: 650 }, (_, i) => ` const filler${i} = '${'x'.repeat(30)}';`).join('\n')} + const tail = 'LARGE_BODY_TAIL_MARKER'; + console.log(start, tail); + } +} +` ); const CodeGraph = (await import('../src/index')).default; @@ -219,4 +238,22 @@ describe.skipIf(!HAS_SQLITE)('matchesSymbol — dotted lookups (regression for # expect(text).toMatch(/\(function\)/); expect((text.match(/\*\*Location:\*\*/g) || []).length).toBeGreaterThanOrEqual(2); }); + + it('codegraph_node includeCode returns container bodies, not only member outlines', async () => { + const res = await handler.execute('codegraph_node', { symbol: 'Session', includeCode: true }); + const text = res.content?.[0]?.text ?? ''; + expect(text).toContain('SESSION_BODY_MARKER'); + expect(text).not.toContain('Structural outline only'); + }); + + it('codegraph_node trims large container bodies from the middle before final output truncation', async () => { + const res = await handler.execute('codegraph_node', { symbol: 'LargeSession', includeCode: true }); + const text = res.content?.[0]?.text ?? ''; + + expect(text).toContain('LARGE_BODY_START_MARKER'); + expect(text).toContain('LARGE_BODY_TAIL_MARKER'); + expect(text).toContain('... (truncated middle) ...'); + expect(text).not.toContain('... (output truncated)'); + expect(text).toMatch(/LARGE_BODY_TAIL_MARKER[\s\S]*\n```/); + }); }); diff --git a/src/context/code-block-trim.ts b/src/context/code-block-trim.ts new file mode 100644 index 00000000..9a689b64 --- /dev/null +++ b/src/context/code-block-trim.ts @@ -0,0 +1,29 @@ +export function trimCodeBlockMiddle(code: string, maxLength: number): string { + if (code.length <= maxLength) { + return code; + } + + const marker = '\n... (truncated middle) ...\n'; + if (maxLength <= marker.length + 20) { + return code.slice(0, maxLength) + '\n... (truncated) ...'; + } + + const available = maxLength - marker.length; + const headTarget = Math.floor(available / 2); + const tailTarget = available - headTarget; + const head = sliceHeadAtLineBoundary(code, headTarget); + const tail = sliceTailAtLineBoundary(code, tailTarget); + return head.replace(/\n+$/, '') + marker + tail.replace(/^\n+/, ''); +} + +function sliceHeadAtLineBoundary(code: string, maxChars: number): string { + const head = code.slice(0, maxChars); + const lineEnd = head.lastIndexOf('\n'); + return lineEnd > maxChars * 0.5 ? head.slice(0, lineEnd) : head; +} + +function sliceTailAtLineBoundary(code: string, maxChars: number): string { + const tail = code.slice(Math.max(0, code.length - maxChars)); + const lineStart = tail.indexOf('\n'); + return lineStart >= 0 && lineStart < maxChars * 0.5 ? tail.slice(lineStart + 1) : tail; +} diff --git a/src/context/index.ts b/src/context/index.ts index 3d19c65d..77802417 100644 --- a/src/context/index.ts +++ b/src/context/index.ts @@ -27,6 +27,7 @@ import { logDebug } from '../errors'; import { validatePathWithinRoot } from '../utils'; import { isTestFile, extractSearchTerms, scorePathRelevance, getStemVariants, isDistinctiveIdentifier } from '../search/query-utils'; import { LOW_CONFIDENCE_MARKER } from './markers'; +import { trimCodeBlockMiddle } from './code-block-trim'; /** * Extract likely symbol names from a natural language query @@ -1236,15 +1237,8 @@ export class ContextBuilder { const code = await this.extractNodeCode(node); if (code) { - // Truncate if too long. Language-neutral marker (no `//` — not a - // comment in Python, Ruby, etc.); this renders inside a fenced - // source block whose language varies. - const truncated = code.length > maxBlockSize - ? code.slice(0, maxBlockSize) + '\n... (truncated) ...' - : code; - blocks.push({ - content: truncated, + content: trimCodeBlockMiddle(code, maxBlockSize), filePath: node.filePath, startLine: node.startLine, endLine: node.endLine, diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts index fc184132..417dfa75 100644 --- a/src/mcp/tools.ts +++ b/src/mcp/tools.ts @@ -22,6 +22,7 @@ import { import type { PendingFile } from '../sync'; import type { Node, Edge, SearchResult, Subgraph, NodeKind } from '../types'; import { isTestFile } from '../search/query-utils'; +import { trimCodeBlockMiddle } from '../context/code-block-trim'; import { existsSync, readFileSync, @@ -33,6 +34,9 @@ import { resolve as resolvePath } from 'path'; /** Maximum output length to prevent context bloat (characters) */ const MAX_OUTPUT_LENGTH = 15000; +/** Maximum source block length inside codegraph_node before final response trimming. */ +const NODE_CODE_BLOCK_MAX_LENGTH = 11_000; + /** * Maximum length for free-form string inputs (query, task, symbol). * Bounds memory and CPU when a buggy or hostile MCP client sends a @@ -58,16 +62,6 @@ const MAX_PATH_LENGTH = 4_096; */ const RUST_PATH_PREFIXES = new Set(['crate', 'super', 'self']); -/** - * Node kinds that contain other symbols. For these, `codegraph_node` with - * `includeCode=true` returns a structural outline (member names + signatures - * + line numbers) instead of the full body, which for a large class is a - * multi-thousand-character wall of source that bloats the agent's context. - */ -const CONTAINER_NODE_KINDS = new Set([ - 'class', 'struct', 'interface', 'trait', 'protocol', 'enum', 'namespace', 'module', -]); - /** Last `::` / `.` / `/`-separated segment of a qualified symbol. */ function lastQualifierPart(symbol: string): string { const parts = symbol.split(/::|[./]/).filter((p) => p.length > 0); @@ -2622,20 +2616,10 @@ export class ToolHandler { /** Render one symbol: details + (optional) body/outline + its caller/callee trail. */ private async renderNodeSection(cg: CodeGraph, node: Node, includeCode: boolean): Promise { let code: string | null = null; - let outline: string | null = null; if (includeCode) { - // For container symbols (class/interface/struct/…), the full body is the - // sum of every method body — a wall of source. Return a structural outline - // (members + signatures + line numbers) instead; leaf symbols return their - // full body. - if (CONTAINER_NODE_KINDS.has(node.kind)) { - outline = this.buildContainerOutline(cg, node); - } - if (!outline) { - code = await cg.getCode(node.id); - } + code = await cg.getCode(node.id); } - return this.formatNodeDetails(node, code, outline) + this.formatTrail(cg, node); + return this.formatNodeDetails(node, code) + this.formatTrail(cg, node); } /** @@ -3197,29 +3181,7 @@ export class ToolHandler { return lines.join('\n'); } - /** - * Build a compact structural outline of a container symbol from its - * indexed children (methods, fields, properties, …) — name, kind, - * line number, and signature — so the agent gets the shape of a class - * without the full source of every method. Returns '' when the container - * has no indexed children, so the caller can fall back to full source. - */ - private buildContainerOutline(cg: CodeGraph, node: Node): string { - const children = cg.getChildren(node.id) - .filter(c => c.kind !== 'import' && c.kind !== 'export') - .sort((a, b) => (a.startLine ?? 0) - (b.startLine ?? 0)); - if (children.length === 0) return ''; - - const lines = [`**Members (${children.length}):**`, '']; - for (const c of children) { - const loc = c.startLine ? `:${c.startLine}` : ''; - const sig = c.signature ? ` — \`${c.signature}\`` : ''; - lines.push(`- ${c.name} (${c.kind})${loc}${sig}`); - } - return lines.join('\n'); - } - - private formatNodeDetails(node: Node, code: string | null, outline?: string | null): string { + private formatNodeDetails(node: Node, code: string | null): string { const location = node.startLine ? `:${node.startLine}` : ''; const lines: string[] = [ `## ${node.name} (${node.kind})`, @@ -3236,14 +3198,12 @@ export class ToolHandler { lines.push('', node.docstring); } - if (outline) { - lines.push('', outline, '', - `> Structural outline only. Read \`${node.filePath}\` or call codegraph_node on a specific member for its body.`); - } else if (code) { + if (code) { // Line-numbered (cat -n style, like codegraph_explore and Read) so the // agent can cite/edit exact lines without re-Reading the file for them. const numbered = node.startLine ? numberSourceLines(code, node.startLine) : code; - lines.push('', '```' + node.language, numbered, '```'); + const bounded = trimCodeBlockMiddle(numbered, NODE_CODE_BLOCK_MAX_LENGTH); + lines.push('', '```' + node.language, bounded, '```'); } return lines.join('\n');