Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions docs/context.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Coding Code 采用两层压缩策略,在不同阈值下自动触发:
| 触发阈值 | `promptEstimate > modelMaxTokens * 0.9` | prompt 估算超过模型最大 token 90% 时触发 |
| 保留最近 turn | 1 | 保留最近 1 个 turn 不压缩 |
| 压缩方式 | 调用 LLM 生成摘要 | 输出 `<summary>...</summary>` 块 |
| 增量压缩 | 是 | 找到已有 SummaryEvent,只压缩 `lastSummarizedTurnId` 之后的事件 |
| 增量压缩 | 是 | 找到已有 SummaryEvent,只压缩 `endTurnId` 之后的事件 |
| 失败追踪 | 连续 3 次失败后停止 | 24 小时 TTL 后重置 |

---
Expand Down Expand Up @@ -90,17 +90,15 @@ interface CompactEvent {
uuid: string;
startTurnId: number;
endTurnId: number;
timestamp: string;
}

// LLM 压缩摘要事件
interface SummaryEvent {
type: 'summary';
uuid: string;
replaces: string[]; // 被替换的事件 UUID 列表
summaryText: string; // 摘要文本
lastSummarizedTurnId: number; // 最后压缩到的 turn ID
timestamp: string;
startTurnId: number; // 摘要覆盖的起始 turn ID
endTurnId: number; // 摘要覆盖的结束 turn ID
summaryText: string; // 摘要文本
}
```

Expand Down
42 changes: 16 additions & 26 deletions packages/codingcode/src/agent/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,12 +128,12 @@
const hooks = yield* HookService;
const mcp = yield* McpService;
const checkpoint = yield* CheckpointService;
const approval = yield* ApprovalService;

Check warning on line 131 in packages/codingcode/src/agent/agent.ts

View workflow job for this annotation

GitHub Actions / lint

'approval' is assigned a value but never used. Allowed unused vars must match /^_/u
const skills = yield* SkillService;
const runtime = yield* ProjectRuntimeService;
const todo = yield* TodoService;

Check warning on line 134 in packages/codingcode/src/agent/agent.ts

View workflow job for this annotation

GitHub Actions / lint

'todo' is assigned a value but never used. Allowed unused vars must match /^_/u
const rules = yield* RulesService;
const context = yield* ContextService;

Check warning on line 136 in packages/codingcode/src/agent/agent.ts

View workflow job for this annotation

GitHub Actions / lint

'context' is assigned a value but never used. Allowed unused vars must match /^_/u
const memory = yield* MemoryService;
const factory = yield* LLMFactoryService;

Expand Down Expand Up @@ -253,16 +253,18 @@

const config = getContextConfig();
const maxOverflowRetries = REACTIVE_COMPACT_MAX_RETRIES;
const model = state.sessionMeta?.model ?? 'unknown';
const effectiveMaxSteps = opts.maxStepsOverride ?? maxSteps;

let stopContinuations = 0;
const effectiveMaxStopContinuations = opts.maxStopContinuations ?? maxStopContinuations;

let messages: Message[] = [];

for (let attempt = 0; attempt <= maxOverflowRetries; attempt++) {
const { messages } = yield* Effect.sync(() =>
const payload = yield* Effect.sync(() =>
context.assemblePayload(state.sessionId, state.projectPath, config, llm.modelInfo.maxTokens)
);
messages = payload.messages;

let lastResult: Result<string, AgentError> | null = null;
let overflow = false;
Expand Down Expand Up @@ -309,26 +311,17 @@
),
catch: (e) => new AgentError('LLM_FAILED', String(e)),
});
if (compressResult.didCompress) {
if (compressResult.didCompress && compressResult.messages) {
yield* q.offer({
_tag: 'ReactiveCompact',
attempt: 1,
released: compressResult.released,
promptEstimate: compressResult.promptEstimate,
});

const rebuilt = yield* Effect.sync(() =>
context.assemblePayload(
state.sessionId,
state.projectPath,
config,
llm.modelInfo.maxTokens
)
);
messages.length = 0;
messages.push(...rebuilt.messages);
messages = compressResult.messages;
state.usage = undefined;
state.promptEstimate = rebuilt.promptEstimate;
state.promptEstimate = compressResult.promptEstimate;
}

const llmMessages = [...messages];
Expand Down Expand Up @@ -364,15 +357,18 @@
context.compactWithLLM(
state.sessionId,
state.projectPath,
messages,
config,
null,
undefined,
undefined,
llm,
undefined,
llm.modelInfo.maxTokens
),
catch: (e) => new AgentError('LLM_FAILED', String(e)),
});
if (compressResult.didCompress && compressResult.messages) {
messages = compressResult.messages;
state.promptEstimate = compressResult.promptEstimate;
}
yield* q.offer({
_tag: 'ReactiveCompact',
attempt: attempt + 1,
Expand Down Expand Up @@ -411,7 +407,7 @@

if (!toolCalls || toolCalls.length === 0) {
if (session) {
yield* session.recordAssistant(state, resp.content, toolCalls || [], model, resp.usage);
yield* session.recordAssistant(state, resp.content, toolCalls || [], resp.usage);
}
const stopDecision = yield* hooks.emitDecision('agent.turn.stop', {
sessionId,
Expand Down Expand Up @@ -467,13 +463,7 @@
}
}

const record = yield* session.recordAssistant(
state,
resp.content,
toolCalls!,
model,
resp.usage
);
const record = yield* session.recordAssistant(state, resp.content, toolCalls!, resp.usage);

Check warning on line 466 in packages/codingcode/src/agent/agent.ts

View workflow job for this annotation

GitHub Actions / lint

'record' is assigned a value but never used. Allowed unused vars must match /^_/u
const allResults = yield* executor.executeBatch(toolCalls, state.sessionId, {
turnId: state.currentTurnId,
projectPath,
Expand All @@ -485,7 +475,7 @@
let todoPrinted = false;
for (const r of allResults) {
const resultOut = r.type === 'denied' ? '' : r.output;
yield* session.recordToolResult(state, record.uuid, r.name, r.id, resultOut);
yield* session.recordToolResult(state, r.name, r.id, resultOut);
if (r.type === 'denied') {
yield* q.offer({ _tag: 'ToolDenied', id: r.id, name: r.name, reason: r.reason });
} else {
Expand Down
3 changes: 2 additions & 1 deletion packages/codingcode/src/client/direct/agent-runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,9 @@ export function createDirectAgentClient(llm: LLMClient, rt: AppRuntime): AgentRu
await rt.runPromise(
Effect.gen(function* () {
const context = yield* ContextService;
const { messages } = context.assemblePayload(sessionId, cwd, getContextConfig());
return yield* Effect.promise(() =>
context.compactWithLLM(sessionId, cwd, getContextConfig(), null)
context.compactWithLLM(sessionId, cwd, messages, getContextConfig(), null)
);
})
);
Expand Down
3 changes: 2 additions & 1 deletion packages/codingcode/src/client/direct/sessions.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Effect } from 'effect';
import { Effect } from 'effect';
import { SessionService } from '../../session/store.js';
import { WorkspaceService } from '../../core/workspace.js';
import { deleteSession } from '../../session/file-ops.js';
Expand All @@ -21,6 +21,7 @@ export interface SessionClient {
resumeSession(input: { sessionId: string; cwd: string }): Promise<SessionEvent[]>;
listSessions(input: { cwd: string }): Promise<SessionIndex[]>;
getSessionHistory(input: { sessionId: string }): Promise<SessionEvent[]>;

deleteSession(input: { sessionId: string }): Promise<void>;
getSessionPermissionMode(input: { sessionId: string }): Promise<PermissionMode>;
setSessionPermissionMode(input: { sessionId: string; mode: PermissionMode }): Promise<void>;
Expand Down
81 changes: 45 additions & 36 deletions packages/codingcode/src/context/service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
const MICRO_COMPACT_MIN_CHARS = 120;
const COMPACTION_THRESHOLD = 0.9;
const KEEP_RECENT_TURNS = 1;
const REACTIVE_COMPACT_MAX_RETRIES = 3;

Check warning on line 36 in packages/codingcode/src/context/service.ts

View workflow job for this annotation

GitHub Actions / lint

'REACTIVE_COMPACT_MAX_RETRIES' is assigned a value but never used. Allowed unused vars must match /^_/u

export class ContextService extends Effect.Service<ContextService>()('Context', {
effect: Effect.gen(function* () {
Expand Down Expand Up @@ -64,8 +64,12 @@
const idx = session.findSessionIndexProxy(sessionId);
const currentTurnId = idx?.currentTurnId ?? 0;

const { hidden, compactedTurnIds: initialCompactedTurnIds } = applyVisibilityEvents(events);
let visible = filterVisible(events, hidden);
const {
hiddenTurnIds,
hiddenOpUuids,
compactedTurnIds: initialCompactedTurnIds,
} = applyVisibilityEvents(events);
let visible = filterVisible(events, hiddenTurnIds, hiddenOpUuids);
let compactedTurnIds = initialCompactedTurnIds;

const preEstimate = estimateTokensFromEvents(visible);
Expand All @@ -82,7 +86,7 @@
if (didCompact) {
events = session.readHistoryFile(jsonlPath);
const updated = applyVisibilityEvents(events);
visible = filterVisible(events, updated.hidden);
visible = filterVisible(events, updated.hiddenTurnIds, updated.hiddenOpUuids);
compactedTurnIds = updated.compactedTurnIds;
}

Expand All @@ -96,11 +100,17 @@
};
};

function filterVisible(events: SessionEvent[], hidden: Set<string>): SessionEvent[] {
function filterVisible(
events: SessionEvent[],
hiddenTurnIds: Set<number>,
hiddenOpUuids: Set<string>
): SessionEvent[] {
return events.filter((ev) => {
if (ev.type === 'hide' || ev.type === 'unhide') return false;
if (ev.type === 'compact') return false;
if ('uuid' in ev && hidden.has((ev as any).uuid)) return false;
if (ev.type === 'session_meta') return false;
if (ev.type === 'rollback') return false;
if (ev.type === 'summary' && hiddenOpUuids.has(ev.uuid)) return false;
if (ev.type === 'compact' && hiddenOpUuids.has(ev.uuid)) return false;
if ('turnId' in ev && hiddenTurnIds.has(ev.turnId)) return false;
return true;
}) as SessionEvent[];
}
Expand Down Expand Up @@ -145,7 +155,6 @@
uuid: randomUUID(),
startTurnId,
endTurnId,
timestamp: new Date().toISOString(),
};
appendLine(jsonlPath, compactEvent);
return true;
Expand All @@ -161,9 +170,7 @@
messages: Message[],
modelMaxTokens: number,
config: ContextConfig,
llm: LLMClient | null,
compactedEvents?: SessionEvent[],
currentTurnId?: number
llm: LLMClient | null
): Promise<CompressResult> => {
const promptEstimate = estimateTokens(messages);
const failures = getFailures(sessionId);
Expand All @@ -179,10 +186,9 @@
const result = await compactWithLLM(
sessionId,
encodedProjectPath,
messages,
config,
llm,
compactedEvents,
currentTurnId,
promptEstimate,
modelMaxTokens
);
Expand All @@ -199,38 +205,46 @@
const compactWithLLM = async (
sessionId: string,
encodedProjectPath: string,
messages: Message[],
config: ContextConfig,
llm: LLMClient | null,
compactedEvents?: SessionEvent[],
currentTurnId?: number,
usage?: number,
modelMaxTokens?: number
): Promise<CompressResult> => {
const payload = assemblePayload(sessionId, encodedProjectPath, config, modelMaxTokens);
if (!compactedEvents || currentTurnId === undefined) {
compactedEvents = payload.compactedEvents;
currentTurnId = payload.currentTurnId;
}

let released = 0;

const threshold = modelMaxTokens ? modelMaxTokens * COMPACTION_THRESHOLD : Infinity;
if (usage === undefined || usage - released > threshold) {
const { compactedEvents, currentTurnId, compactedTurnIds } = assemblePayload(
sessionId,
encodedProjectPath,
config,
modelMaxTokens
);
released += await tryCompaction(
sessionId,
config,
llm,
compactedEvents,
currentTurnId,
payload.compactedTurnIds
compactedTurnIds
);
}

if (released <= 0) {
return {
didCompress: false,
released: 0,
promptEstimate: usage ?? estimateTokens(messages),
};
}

const postPayload = assemblePayload(sessionId, encodedProjectPath, config, modelMaxTokens);
return {
didCompress: released > 0,
didCompress: true,
released,
promptEstimate: estimateTokens(postPayload.messages),
messages: postPayload.messages,
};
};

Expand Down Expand Up @@ -270,23 +284,18 @@
const summary = await callLLMForCompaction(msgs, compactionLlm, config);
if (!summary) return 0;

const replacedUuids: string[] = [];
for (const ev of targetEvents) {
if ('uuid' in (ev as any)) replacedUuids.push((ev as any).uuid);
}

const lastTurnId = Math.max(
...targetEvents.filter((e) => 'turnId' in e).map((e) => (e as any).turnId),
0
);
const turnIds = targetEvents
.filter((e) => 'turnId' in e)
.map((e) => (e as any).turnId as number);
const startTurnId = Math.min(...turnIds);
const endTurnId = Math.max(...turnIds);

const event: SummaryEvent = {
type: 'summary',
uuid: randomUUID(),
replaces: replacedUuids,
startTurnId,
endTurnId,
summaryText: summary,
lastSummarizedTurnId: lastTurnId,
timestamp: new Date().toISOString(),
};
appendLine(resolveSessionJsonlPath(sessionId), event);

Expand All @@ -301,7 +310,7 @@

if (!existingSummary) return inRange;

const lastTurn = existingSummary.lastSummarizedTurnId ?? 0;
const lastTurn = existingSummary.endTurnId ?? 0;
return inRange.filter((e) => 'turnId' in e && (e as any).turnId > lastTurn);
}

Expand Down
1 change: 1 addition & 0 deletions packages/codingcode/src/context/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ export interface CompressResult {
didCompress: boolean;
released: number;
promptEstimate: number;
messages?: Message[];
}
2 changes: 1 addition & 1 deletion packages/codingcode/src/memory/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ export class MemoryService extends Effect.Service<MemoryService>()('Memory', {
if (!projectAuto) return '';

const stripped = stripMarkersForPrompt(projectAuto);
const truncated = truncateForPrompt(stripped, PROMPT_MAX_BYTES);
const truncated = truncateForPrompt(stripped, cfg.promptMaxBytes);

return truncated ? `## Long-term Memory\n\n${truncated}` : '';
}
Expand Down
28 changes: 26 additions & 2 deletions packages/codingcode/src/server/routes/sessions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import { ContextService } from '../../context/service.js';
import { getContextConfig } from '../../context/config.js';
import { CheckpointService } from '../../checkpoint/checkpoint-service.js';
import { WorkspaceService } from '../../core/workspace.js';
import { LLMFactoryService } from '../../llm/factory.js';
import type { LLMClient } from '../../llm/client.js';
import { errorResponse } from '../util.js';

type ManagedRt = ManagedRuntime.ManagedRuntime<any, any>;
Expand Down Expand Up @@ -136,9 +138,31 @@ export function createSessionsRouter(rt: ManagedRt): Hono {
const result = await runWithLayer(
Effect.gen(function* () {
const context = yield* ContextService;
const state = yield* (yield* SessionService).create(normalizedCwd, 'unknown', sessionId);
const factory = yield* LLMFactoryService;
const session = yield* SessionService;
const state = yield* session.create(normalizedCwd, 'unknown', sessionId);

let llm: LLMClient | null = null;
const entry = yield* factory.getActiveEntry().pipe(Effect.either);
if (entry._tag === 'Right') {
const client = yield* factory.createClient(entry.right).pipe(Effect.either);
if (client._tag === 'Right') llm = client.right;
}

const { messages } = context.assemblePayload(
state.sessionId,
state.projectPath,
getContextConfig()
);

return yield* Effect.promise(() =>
context.compactWithLLM(state.sessionId, state.projectPath, getContextConfig(), null)
context.compactWithLLM(
state.sessionId,
state.projectPath,
messages,
getContextConfig(),
llm
)
);
})
);
Expand Down
Loading
Loading