From 74ff07f15ba6eaf35464ac44c2d025f1b05e7aa8 Mon Sep 17 00:00:00 2001 From: 7Sageer <7sageer@djwcb.cn> Date: Thu, 25 Jun 2026 20:15:17 +0800 Subject: [PATCH 01/16] feat(agent-core): rework compaction to keep only user prompts and summary Compact the whole history, keeping only real user prompts within a 20k token budget followed by a user-role summary prefixed with SUMMARY_PREFIX. Replace the compaction prompt with SUMMARIZATION_PROMPT, trigger auto-compaction at 90% of the context window, and drop assistant/tool messages and deferred injections on compaction. --- .changeset/rework-compaction-strategy.md | 5 + .../compaction/compaction-instruction.md | 74 +-- .../compaction/compaction-summary-prefix.md | 1 + .../agent-core/src/agent/compaction/full.ts | 116 ++-- .../agent-core/src/agent/compaction/index.ts | 1 + .../src/agent/compaction/memento.ts | 113 ++++ .../src/agent/compaction/strategy.ts | 177 +---- .../agent-core/src/agent/context/index.ts | 20 +- .../src/services/message/transcript.ts | 23 +- .../test/agent/compaction/full.test.ts | 608 ++++++------------ .../test/agent/compaction/memento.test.ts | 99 +++ .../test/agent/compaction/micro.test.ts | 8 +- .../test/agent/compaction/strategy.test.ts | 208 ++---- .../agent-core/test/agent/context.test.ts | 46 +- packages/agent-core/test/agent/resume.test.ts | 9 +- .../test/prompt-placeholders.test.ts | 1 - .../test/services/message-transcript.test.ts | 42 +- packages/server/test/sessions.e2e.test.ts | 2 +- .../server/test/snapshotService.unit.test.ts | 6 +- 19 files changed, 624 insertions(+), 935 deletions(-) create mode 100644 .changeset/rework-compaction-strategy.md create mode 100644 packages/agent-core/src/agent/compaction/compaction-summary-prefix.md create mode 100644 packages/agent-core/src/agent/compaction/memento.ts create mode 100644 packages/agent-core/test/agent/compaction/memento.test.ts diff --git a/.changeset/rework-compaction-strategy.md b/.changeset/rework-compaction-strategy.md new file mode 100644 index 000000000..6b42303d2 --- /dev/null +++ b/.changeset/rework-compaction-strategy.md @@ -0,0 +1,5 @@ +--- +"@moonshot-ai/kimi-code": minor +--- + +Rework conversation compaction to keep only real user prompts followed by a user-role summary, dropping assistant and tool messages. diff --git a/packages/agent-core/src/agent/compaction/compaction-instruction.md b/packages/agent-core/src/agent/compaction/compaction-instruction.md index 49b0d80b4..42fae605d 100644 --- a/packages/agent-core/src/agent/compaction/compaction-instruction.md +++ b/packages/agent-core/src/agent/compaction/compaction-instruction.md @@ -1,69 +1,9 @@ +You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task. ---- This message is a direct task, not part of the above conversation --- +Include: +- Current progress and key decisions made +- Important context, constraints, or user preferences +- What remains to be done (clear next steps) +- Any critical data, examples, or references needed to continue -You are now given a task to compact this conversation context according to specific priorities and output requirements. - -Output text only. DO NOT CALL ANY TOOLS. Calling tools will be rejected and fails the task. You already have all the information you need in the conversation history. You have only one chance. - -The goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared work. - -{{ customInstruction }} - - - -1. **Current Task State**: What is being worked on RIGHT NOW -2. **Errors & Solutions**: All encountered errors and their resolutions -3. **Code Evolution**: Final working versions only (remove intermediate attempts) -4. **System Context**: Project structure, dependencies, environment setup -5. **Design Decisions**: Architectural choices and their rationale -6. **TODO Items**: Unfinished tasks and known issues - - - -## Current Focus - -[What we're working on now] - -## Environment - -- [Key setup/config points] -- ... - -## Completed Tasks - -- [Task]: [Brief outcome] -- ... - -## Active Issues - -- [Issue]: [Status/Next steps] -- ... - -## Code State - -### [Critical file name] - -[Brief description of the file's purpose and current state] - -``` -[The latest version of critical code snippets in this file, <20 lines] -``` - -### [Critical file name] - -- [Useful classes/methods/functions]: [Brief description/usage] -- ... - - - -## Important Context - -- [Any crucial information not covered above] -- ... - -## All User Messages - -- [Detailed non tool use user message] -- ... - - +Be concise, structured, and focused on helping the next LLM seamlessly continue the work. diff --git a/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md b/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md new file mode 100644 index 000000000..62a7161b8 --- /dev/null +++ b/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md @@ -0,0 +1 @@ +Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis: \ No newline at end of file diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts index 2d608bae5..5380181dc 100644 --- a/packages/agent-core/src/agent/compaction/full.ts +++ b/packages/agent-core/src/agent/compaction/full.ts @@ -19,7 +19,6 @@ import { retryBackoffDelays, sleepForRetry, } from '../../loop/retry'; -import { renderPrompt } from '../../utils/render-prompt'; import { estimateTokens, estimateTokensForMessages, @@ -29,13 +28,18 @@ import { resolveCompletionBudget, } from '../../utils/completion-budget'; import compactionInstructionTemplate from './compaction-instruction.md?raw'; -import { renderTodoList, type TodoItem } from '../../tools/builtin/state/todo-list'; import type { CompactionBeginData, CompactionResult } from './types'; import { DEFAULT_COMPACTION_CONFIG, DefaultCompactionStrategy, type CompactionStrategy, } from './strategy'; +import { + COMPACT_USER_MESSAGE_MAX_TOKENS, + buildCompactionSummaryText, + collectCompactableUserMessages, + selectRecentUserMessages, +} from './memento'; export const MAX_COMPACTION_RETRY_ATTEMPTS = 5; @@ -68,7 +72,7 @@ export class FullCompaction { reservedContextSize: agent.kimiConfig?.loopControl?.reservedContextSize ?? DEFAULT_COMPACTION_CONFIG.reservedContextSize, - } + }, ); } @@ -91,9 +95,8 @@ export class FullCompaction { }); return; } - const compactedCount = this.strategy.computeCompactCount(this.agent.context.history, data.source); - if (compactedCount === 0) { - throw new KimiError(ErrorCodes.COMPACTION_UNABLE, 'No prefix that can be compacted in current history.'); + if (this.agent.context.history.length === 0) { + throw new KimiError(ErrorCodes.COMPACTION_UNABLE, 'No messages to compact in current history.'); } this.agent.records.logRecord({ type: 'full_compaction.begin', @@ -107,7 +110,7 @@ export class FullCompaction { const abortController = new AbortController(); this.compacting = { abortController, - promise: this.compactionWorker(abortController.signal, data, compactedCount), + promise: this.compactionWorker(abortController.signal, data), blockedByTurn: false, }; } @@ -202,34 +205,14 @@ export class FullCompaction { private async compactionWorker( signal: AbortSignal, data: Readonly, - compactedCount: number, ): Promise { try { - const finalResult = { - summary: '', - compactedCount: 1, - tokensBefore: 0, - tokensAfter: 0, - }; - - for (let round = 1; ; round++) { - const result = await this.compactionRound(round, signal, data, compactedCount); - if (!result) return; - - finalResult.summary = result.summary; - finalResult.compactedCount += result.compactedCount - 1; - finalResult.tokensBefore += result.tokensBefore - finalResult.tokensAfter; - finalResult.tokensAfter = result.tokensAfter; - - if (result.tokensBefore - result.tokensAfter < 1024) break; - if (!this.strategy.shouldBlock(result.tokensAfter)) break; - compactedCount = this.strategy.computeCompactCount(this.agent.context.history, data.source); - if (compactedCount === 0) break; - } + const result = await this.compactionRound(signal, data); + if (!result) return; this.markCompleted(); - this.agent.emitEvent({ type: 'compaction.completed', result: finalResult }); + this.agent.emitEvent({ type: 'compaction.completed', result }); await this.agent.injection.injectGoal(); - this.triggerPostCompactHook(data, finalResult); + this.triggerPostCompactHook(data, result); } catch (error) { if (isAbortError(error)) return; const blockedByTurn = this.compacting?.blockedByTurn === true; @@ -245,19 +228,23 @@ export class FullCompaction { } } + private buildInstruction(customInstruction: string | undefined): string { + const base = compactionInstructionTemplate.trimEnd(); + if (customInstruction === undefined || customInstruction.trim().length === 0) { + return base; + } + return `${base}\n\n${customInstruction}`; + } + private async compactionRound( - round: number, signal: AbortSignal, data: Readonly, - initialCompactedCount: number, - ) { + ): Promise { const startedAt = Date.now(); const originalHistory = [...this.agent.context.history]; const tokensBefore = estimateTokensForMessages(originalHistory); let retryCount = 0; try { - let compactedCount = initialCompactedCount; - await this.triggerPreCompactHook(data, tokensBefore, signal); const model = this.agent.config.model; @@ -268,15 +255,20 @@ export class FullCompaction { }), capability: this.agent.config.modelCapabilities, }); + const instruction = this.buildInstruction(data.instruction); const delays = retryBackoffDelays(MAX_COMPACTION_RETRY_ATTEMPTS); - let usage: TokenUsage | null; - let summary: string; + let usage: TokenUsage | null = null; + let summary: string | undefined; + // Compact the whole history, dropping the oldest item on overflow to + // preserve the prefix-cache-friendly tail. `historyForModel` is the + // (possibly trimmed) view sent to the model; the summary is always built + // from the untouched `originalHistory`. + let historyForModel = originalHistory; while (true) { - const messagesToCompact = originalHistory.slice(0, compactedCount); const messages = [ - ...this.agent.context.project(messagesToCompact), - createUserMessage(renderPrompt(compactionInstructionTemplate, { customInstruction: data.instruction ?? '' })), + ...this.agent.context.project(historyForModel), + createUserMessage(instruction), ]; try { const response = await this.agent.generate( @@ -294,14 +286,16 @@ export class FullCompaction { summary = extractCompactionSummary(response); break; } catch (error) { - if ( + const isOverflow = error instanceof APIContextOverflowError || error instanceof CompactionTruncatedError || - error instanceof APIEmptyResponseError // e.g. think-only - ) { - compactedCount = this.strategy.reduceCompactOnOverflow(messagesToCompact); + error instanceof APIEmptyResponseError; + if (isOverflow && historyForModel.length > 1) { + historyForModel = historyForModel.slice(1); + retryCount = 0; + continue; } - else if (!isRetryableGenerateError(error)) { + if (!isRetryableGenerateError(error)) { throw error; } if (retryCount + 1 >= MAX_COMPACTION_RETRY_ATTEMPTS) { @@ -325,14 +319,16 @@ export class FullCompaction { } } - summary = this.postProcessSummary(summary); - - const recent = originalHistory.slice(compactedCount); - const tokensAfter = estimateTokens(summary) + estimateTokensForMessages(recent); + const summaryText = buildCompactionSummaryText(summary ?? ''); + const keptUserMessages = selectRecentUserMessages( + collectCompactableUserMessages(originalHistory), + COMPACT_USER_MESSAGE_MAX_TOKENS, + ); + const tokensAfter = estimateTokens(summaryText) + estimateTokensForMessages(keptUserMessages); const result: CompactionResult = { - summary, - compactedCount, + summary: summaryText, + compactedCount: originalHistory.length, tokensBefore, tokensAfter, }; @@ -343,7 +339,7 @@ export class FullCompaction { duration: Date.now() - startedAt, compactedCount: result.compactedCount, retryCount, - round, + round: 1, thinkingLevel: this.agent.config.thinkingLevel, ...usage, ...data, @@ -351,12 +347,12 @@ export class FullCompaction { this.agent.context.applyCompaction(result); return result; } catch (error) { - if (isAbortError(error)) return; + if (isAbortError(error)) return undefined; this.agent.telemetry.track('compaction_failed', { ...data, tokensBefore, duration: Date.now() - startedAt, - round, + round: 1, retryCount, thinkingLevel: this.agent.config.thinkingLevel, errorType: error instanceof Error ? error.name : 'Unknown', @@ -395,16 +391,6 @@ export class FullCompaction { }, }); } - - private postProcessSummary(summary: string): string { - const storeData = this.agent.tools.storeData(); - const todos = (storeData['todo'] as readonly TodoItem[] | undefined) ?? []; - if (todos.length === 0) { - return summary; - } - const todoMarkdown = renderTodoList(todos, '## TODO List'); - return `${summary.trim()}\n\n${todoMarkdown}`; - } } function extractCompactionSummary(response: GenerateResult): string { diff --git a/packages/agent-core/src/agent/compaction/index.ts b/packages/agent-core/src/agent/compaction/index.ts index 4f92ac9fe..4e209f83b 100644 --- a/packages/agent-core/src/agent/compaction/index.ts +++ b/packages/agent-core/src/agent/compaction/index.ts @@ -2,3 +2,4 @@ export * from './full'; export * from './micro'; export * from './strategy'; export * from './types'; +export * from './memento'; diff --git a/packages/agent-core/src/agent/compaction/memento.ts b/packages/agent-core/src/agent/compaction/memento.ts new file mode 100644 index 000000000..2af6a0abf --- /dev/null +++ b/packages/agent-core/src/agent/compaction/memento.ts @@ -0,0 +1,113 @@ +import { estimateTokens, estimateTokensForMessage } from '../../utils/tokens'; +import summaryPrefixTemplate from './compaction-summary-prefix.md?raw'; + +/** + * "Memento" compaction helpers. + * + * Compaction rewrites the model context as: the most recent user messages + * (verbatim, within a token budget) followed by a single user-role summary + * that is prefixed with `COMPACTION_SUMMARY_PREFIX`. Assistant messages, + * tool calls, and tool results are dropped. These helpers apply the exact + * same rule for both the live context rewrite and the transcript reducer. + */ + +export const COMPACTION_SUMMARY_PREFIX = summaryPrefixTemplate.trimEnd(); +export const COMPACT_USER_MESSAGE_MAX_TOKENS = 20_000; + +interface ContentPartLike { + readonly type: string; + readonly text?: string; +} + +interface MessageLike { + readonly role: string; + readonly content: readonly ContentPartLike[]; + readonly origin?: { readonly kind: string; readonly trigger?: string } | undefined; +} + +function extractText(content: readonly ContentPartLike[]): string { + let text = ''; + for (const part of content) { + if (part.type === 'text' && typeof part.text === 'string') { + text += part.text; + } + } + return text; +} + +export function isCompactionSummaryMessage(message: MessageLike): boolean { + if (message.origin?.kind === 'compaction_summary') return true; + return extractText(message.content).startsWith(`${COMPACTION_SUMMARY_PREFIX}\n`); +} + +/** + * Keep only genuine user input (real user prompts and user-slash skill + * activations). Injections (system reminders, plan-mode reminders), + * background-task notifications, system triggers, cron/hook/retry messages, + * and previous compaction summaries are excluded — they are either + * re-injected each turn or ephemeral, since initial context is rebuilt + * every turn. + */ +export function isRealUserInput(message: MessageLike): boolean { + if (message.role !== 'user') return false; + const origin = message.origin; + if (origin === undefined || origin.kind === 'user') return true; + if (origin.kind === 'skill_activation') return origin.trigger === 'user-slash'; + return false; +} + +export function collectCompactableUserMessages(messages: readonly T[]): T[] { + return messages.filter( + (message) => isRealUserInput(message) && !isCompactionSummaryMessage(message), + ); +} + +function truncateTextToTokens(text: string, maxTokens: number): string { + if (maxTokens <= 0) return ''; + if (estimateTokens(text) <= maxTokens) return text; + let end = Math.min(text.length, maxTokens * 4); + while (end > 0 && estimateTokens(text.slice(0, end)) > maxTokens) { + end--; + } + return text.slice(0, end); +} + +function truncateUserMessage(message: T, maxTokens: number): T { + const text = truncateTextToTokens(extractText(message.content), maxTokens); + return { + ...message, + content: [{ type: 'text', text }], + toolCalls: [], + } as unknown as T; +} + +/** + * Keep the most recent user messages whose cumulative estimated size fits + * `maxTokens`. The oldest kept message is truncated to the remaining budget + * when it would otherwise overflow; older messages are dropped. + */ +export function selectRecentUserMessages( + messages: readonly T[], + maxTokens: number = COMPACT_USER_MESSAGE_MAX_TOKENS, +): T[] { + const selected: T[] = []; + let remaining = maxTokens; + for (let i = messages.length - 1; i >= 0 && remaining > 0; i--) { + const message = messages[i]!; + const tokens = estimateTokensForMessage(message as never); + if (tokens <= remaining) { + selected.push(message); + remaining -= tokens; + } else { + selected.push(truncateUserMessage(message, remaining)); + break; + } + } + selected.reverse(); + return selected; +} + +export function buildCompactionSummaryText(summary: string): string { + const suffix = summary.trim(); + return `${COMPACTION_SUMMARY_PREFIX}\n${suffix.length > 0 ? suffix : '(no summary available)'}`; +} diff --git a/packages/agent-core/src/agent/compaction/strategy.ts b/packages/agent-core/src/agent/compaction/strategy.ts index edf9132e0..faff72c0f 100644 --- a/packages/agent-core/src/agent/compaction/strategy.ts +++ b/packages/agent-core/src/agent/compaction/strategy.ts @@ -1,34 +1,31 @@ -import type { Message } from "@moonshot-ai/kosong"; -import { estimateTokensForMessage } from "../../utils/tokens"; -import type { CompactionSource } from "./types"; +import type { CompactionSource } from './types'; export interface CompactionConfig { + /** Fraction of the model context window that triggers auto-compaction. */ triggerRatio: number; + /** Fraction of the model context window that blocks the turn on compaction. */ blockRatio: number; + /** Reserved output budget; compaction triggers early to leave this much room. */ reservedContextSize: number; + /** Maximum number of auto-compactions allowed in a single turn. */ maxCompactionPerTurn: number; - maxRecentMessages: number; - maxRecentUserMessages: number; - maxRecentSizeRatio: number; - minOverflowReductionRatio: number; } +/** + * Auto-compact at 90% of the resolved context window. `blockRatio` matches + * `triggerRatio` so compaction runs synchronously with no background + * compaction. + */ export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = { - triggerRatio: 0.85, - blockRatio: 0.85, // Same as triggerRatio to disable async compaction + triggerRatio: 0.9, + blockRatio: 0.9, reservedContextSize: 50_000, maxCompactionPerTurn: Infinity, - maxRecentMessages: 4, - maxRecentUserMessages: Infinity, - maxRecentSizeRatio: 0.2, - minOverflowReductionRatio: 0.05, }; export interface CompactionStrategy { shouldCompact(usedSize: number): boolean; shouldBlock(usedSize: number): boolean; - computeCompactCount(messages: readonly Message[], source: CompactionSource): number; - reduceCompactOnOverflow(messages: readonly Message[]): number; readonly checkAfterStep: boolean; readonly maxCompactionPerTurn: number; } @@ -36,8 +33,8 @@ export interface CompactionStrategy { export class DefaultCompactionStrategy implements CompactionStrategy { constructor( protected readonly maxSizeProvider: () => number, - protected readonly config: CompactionConfig = DEFAULT_COMPACTION_CONFIG - ) { } + protected readonly config: CompactionConfig = DEFAULT_COMPACTION_CONFIG, + ) {} protected get maxSize(): number { return this.maxSizeProvider(); @@ -64,111 +61,6 @@ export class DefaultCompactionStrategy implements CompactionStrategy { return reservedSize > 0 && reservedSize < this.maxSize && usedSize + reservedSize >= this.maxSize; } - computeCompactCount(messages: readonly Message[], source: CompactionSource): number { - // Return value: N messages to be compacted (0 means no compaction possible) - // LLM Input: messages.slice(0, N) + [user:instruction] - // Preserved recent messages: messages.slice(N) - - // Manual compaction - if (source === 'manual') { - for (let i = messages.length - 1; i > 0; i--) { - if (canSplitAfter(messages, i)) { - return this.fitCompactCountToWindow(messages, i + 1); - } - } - return 0; - } - - // Auto compaction rules (in order of precedence): - // 1. The split after messages[N-1] must be safe per `canSplitAfter`: - // messages[N-1] is not a user or asst-with-tool-calls, and the retained - // suffix messages.slice(N) has no orphan tool result. - // 2. At least one recent message must be preserved - // 3. At most maxRecentMessages recent messages should be preserved - // 4. At most maxRecentUserMessages recent user messages should be preserved - // 5. At most maxRecentSizeRatio * maxSize recent messages should be preserved - // 6. N should be as small as possible - - let recentMessages = 1; - let recentUserMessages = 0; - let recentSize = 0; - let bestN: number | undefined; - - for (; recentMessages < messages.length; recentMessages++) { - const splitIndex = messages.length - recentMessages - 1; - const m2 = messages[messages.length - recentMessages]!; - - if (m2.role === 'user') { - recentUserMessages++; - } - recentSize += estimateTokensForMessage(m2); - - if (canSplitAfter(messages, splitIndex)) { - bestN = splitIndex + 1; - } - - const reachesMax = recentMessages >= this.config.maxRecentMessages - || recentUserMessages >= this.config.maxRecentUserMessages - || recentSize >= this.maxSize * this.config.maxRecentSizeRatio; - if (reachesMax && bestN !== undefined) { - break; - } - } - - return this.fitCompactCountToWindow(messages, bestN ?? 0); - } - - reduceCompactOnOverflow(messages: readonly Message[]): number { - const minReducedSize = Math.max( - 1, - Math.ceil(this.maxSize * this.config.minOverflowReductionRatio), - ); - let reducedSize = 0; - let bestN: number | undefined; - - for (let i = messages.length - 2; i > 0; i--) { - reducedSize += estimateTokensForMessage(messages[i + 1]!); - if (canSplitAfter(messages, i)) { - bestN = i + 1; - if (reducedSize >= minReducedSize) { - return i + 1; - } - } - } - return bestN ?? messages.length; - } - - private fitCompactCountToWindow( - messages: readonly Message[], - compactedCount: number, - ): number { - if (this.maxSize <= 0 || compactedCount <= 0) { - return compactedCount; - } - - let compactedSize = 0; - for (let i = 0; i < compactedCount; i++) { - compactedSize += estimateTokensForMessage(messages[i]!); - } - if (compactedSize <= this.maxSize) { - return compactedCount; - } - - let bestN: number | undefined; - for (let n = compactedCount - 1; n > 0; n--) { - compactedSize -= estimateTokensForMessage(messages[n]!); - if (!canSplitAfter(messages, n - 1)) { - continue; - } - bestN = n; - if (compactedSize <= this.maxSize) { - return n; - } - } - - return bestN ?? compactedCount; - } - get checkAfterStep(): boolean { return this.config.triggerRatio !== this.config.blockRatio; } @@ -178,43 +70,4 @@ export class DefaultCompactionStrategy implements CompactionStrategy { } } -/** - * Decide whether a compaction split is safe to place immediately after - * `messages[index]`. A split is safe only when: - * - `messages[index]` itself is not a user message or an assistant message - * with pending tool calls (cutting either of those off from what follows - * would break the conversation), AND - * - the next message is not a tool result. The history is well-formed: - * tool results only appear after their owning `asst_w_tc` and all tool - * results for one exchange land consecutively before the next non-tool - * message. So if the suffix starts with a tool result, its `asst_w_tc` - * must be in the compacted prefix, which would orphan that result - * (e.g. splitting between tool_a and tool_b of a parallel call), AND - * - the compacted prefix itself does not end with an unresolved tool - * exchange, because pending tool results must remain in the retained tail. - */ -function canSplitAfter(messages: readonly Message[], index: number): boolean { - const m = messages[index]; - if (m === undefined) return false; - if (m.role === 'user') return false; - if (m.role === 'assistant' && m.toolCalls.length > 0) return false; - if (messages[index + 1]?.role === 'tool') return false; - if (prefixEndsWithOpenToolExchange(messages, index)) return false; - return true; -} - -function prefixEndsWithOpenToolExchange(messages: readonly Message[], index: number): boolean { - if (messages[index]?.role !== 'tool') return false; - - let toolResultCount = 0; - for (let i = index; i >= 0; i--) { - const message = messages[i]; - if (message === undefined) return false; - if (message.role === 'tool') { - toolResultCount++; - continue; - } - return message.role === 'assistant' && message.toolCalls.length > toolResultCount; - } - return false; -} +export type { CompactionSource }; diff --git a/packages/agent-core/src/agent/context/index.ts b/packages/agent-core/src/agent/context/index.ts index 88edda53f..5d426aa9b 100644 --- a/packages/agent-core/src/agent/context/index.ts +++ b/packages/agent-core/src/agent/context/index.ts @@ -4,7 +4,12 @@ import type { Agent } from '..'; import { ErrorCodes, KimiError } from '../../errors'; import type { ExecutableToolResult, LoopRecordedEvent } from '../../loop'; import { estimateTokensForMessages } from '../../utils/tokens'; -import type { CompactionResult } from '../compaction'; +import { + COMPACT_USER_MESSAGE_MAX_TOKENS, + collectCompactableUserMessages, + selectRecentUserMessages, + type CompactionResult, +} from '../compaction'; import { project, trimTrailingOpenToolExchange } from './projector'; import { USER_PROMPT_ORIGIN, @@ -175,17 +180,24 @@ export class ContextMemory { tokensAfter: result.tokensAfter, }, }); + const keptUserMessages = selectRecentUserMessages( + collectCompactableUserMessages(this._history), + COMPACT_USER_MESSAGE_MAX_TOKENS, + ); this._history = [ + ...keptUserMessages, { - role: 'assistant', + role: 'user', content: [{ type: 'text', text: result.summary }], toolCalls: [], origin: { kind: 'compaction_summary' }, }, - ...this._history.slice(result.compactedCount), ]; this.openSteps.clear(); - this.flushDeferredMessagesIfToolExchangeClosed(); + this.pendingToolResultIds.clear(); + // Drop deferred messages (mostly injections/system reminders) instead of + // flushing them: initial context is rebuilt every turn. + this.deferredMessages = []; this._tokenCount = result.tokensAfter; this.tokenCountCoveredMessageCount = this._history.length; this.agent.microCompaction.reset(); diff --git a/packages/agent-core/src/services/message/transcript.ts b/packages/agent-core/src/services/message/transcript.ts index e98bed516..771dc16cc 100644 --- a/packages/agent-core/src/services/message/transcript.ts +++ b/packages/agent-core/src/services/message/transcript.ts @@ -45,6 +45,11 @@ import path from 'node:path'; import type { AgentRecord } from '../../agent/records'; import type { ContextMessage } from '../../agent/context'; import type { ExecutableToolResult, LoopRecordedEvent } from '../../loop'; +import { + COMPACT_USER_MESSAGE_MAX_TOKENS, + collectCompactableUserMessages, + selectRecentUserMessages, +} from '../../agent/compaction'; type ContentPart = ContextMessage['content'][number]; @@ -238,20 +243,24 @@ export function reduceWireRecords(records: Iterable): { applyLoopEvent(record.event, record.time); break; case 'context.apply_compaction': { - // ContextMemory drops history[0..compactedCount] and prepends the - // summary; we keep the prefix and insert the summary at the fold - // point so the transcript shows both. - const tailLength = Math.max(0, foldedLength - record.compactedCount); - transcript.splice(Math.max(0, transcript.length - tailLength), 0, { + // Mirrors ContextMemory.applyCompaction: the live context becomes the + // most recent user messages followed by a user-role summary. The + // transcript keeps the full history and appends the summary marker; + // foldedLength tracks the post-compaction live context length. + const keptUserMessages = selectRecentUserMessages( + collectCompactableUserMessages(transcript.map((entry) => entry.message)), + COMPACT_USER_MESSAGE_MAX_TOKENS, + ); + transcript.push({ message: { - role: 'assistant', + role: 'user', content: [{ type: 'text', text: record.summary }], toolCalls: [], origin: { kind: 'compaction_summary' }, }, time: record.time, }); - foldedLength = tailLength + 1; + foldedLength = keptUserMessages.length + 1; openSteps.clear(); flushDeferredIfToolExchangeClosed(); break; diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts index 85da0ed46..1703d645b 100644 --- a/packages/agent-core/test/agent/compaction/full.test.ts +++ b/packages/agent-core/test/agent/compaction/full.test.ts @@ -17,7 +17,11 @@ import { import { afterEach, describe, expect, it, vi } from 'vitest'; import type { AgentOptions } from '../../../src/agent'; -import { DefaultCompactionStrategy, type CompactionStrategy } from '../../../src/agent/compaction'; +import { + COMPACTION_SUMMARY_PREFIX, + DefaultCompactionStrategy, + type CompactionStrategy, +} from '../../../src/agent/compaction'; import { FLAG_DEFINITIONS, MASTER_ENV } from '../../../src/flags'; import { HookEngine, type HookEngineTriggerArgs } from '../../../src/session/hooks'; import { estimateTokensForMessages } from '../../../src/utils/tokens'; @@ -43,138 +47,6 @@ const CATALOGUED_MODEL_CAPABILITIES = { const MICRO_COMPACTION_FLAG_ENV = getMicroCompactionFlagEnv(); describe('FullCompaction', () => { - it('keeps an oversized trailing user message as recent', () => { - const strategy = testCompactionStrategy(); - const messages = [ - textMessage('user', 'old user'), - textMessage('assistant', 'old assistant'), - textMessage('user', `pending user ${'x'.repeat(1_200)}`), - ]; - - expect(strategy.computeCompactCount(messages, 'auto')).toBe(2); - }); - - it('keeps consecutive trailing user messages as recent', () => { - const strategy = testCompactionStrategy(); - const messages = [ - textMessage('user', 'old user'), - textMessage('assistant', 'old assistant'), - textMessage('user', `pending user one ${'x'.repeat(1_200)}`), - textMessage('user', `pending user two ${'x'.repeat(1_200)}`), - ]; - - expect(strategy.computeCompactCount(messages, 'auto')).toBe(2); - }); - - it('compacts the prefix when the trailing exchange itself is oversized', () => { - const strategy = testCompactionStrategy(); - const messages = [ - textMessage('user', 'old user'), - textMessage('assistant', 'old assistant'), - textMessage('user', 'recent user'), - textMessage('assistant', `recent assistant ${'x'.repeat(1_200)}`), - ]; - - expect(strategy.computeCompactCount(messages, 'auto')).toBe(2); - }); - - it('returns 0 when there is nothing to compact', () => { - const strategy = testCompactionStrategy(); - expect(strategy.computeCompactCount([], 'auto')).toBe(0); - expect(strategy.computeCompactCount([textMessage('user', 'only pending')], 'auto')).toBe(0); - expect( - strategy.computeCompactCount( - [ - textMessage('user', 'a'), - textMessage('user', 'b'), - textMessage('user', 'c'), - ], - 'auto', - ), - ).toBe(0); - }); - - it('returns 0 when no intermediate split exists and the last message is also unsplittable', () => { - const strategy = testCompactionStrategy(); - const messages: Message[] = [ - textMessage('user', 'inspect'), - { - role: 'assistant', - content: [], - toolCalls: [{ type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' }], - }, - ]; - - expect(strategy.computeCompactCount(messages, 'auto')).toBe(0); - }); - - it('does not split inside a parallel tool exchange', () => { - const strategy = testCompactionStrategy(); - const messages: Message[] = [ - textMessage('user', 'old user'), - textMessage('assistant', 'old assistant'), - textMessage('user', 'run both tools'), - { - role: 'assistant', - content: [], - toolCalls: [ - { type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' }, - { type: 'function', id: 'call_b', name: 'Lookup', arguments: '{}' }, - ], - }, - { role: 'tool', content: [{ type: 'text', text: 'a' }], toolCalls: [], toolCallId: 'call_a' }, - { role: 'tool', content: [{ type: 'text', text: 'b' }], toolCalls: [], toolCallId: 'call_b' }, - textMessage('user', 'next prompt'), - ]; - - // The only valid split is before the parallel exchange (after 'old assistant'), - // never between tool_a and tool_b — that would leave tool_b as an orphan. - expect(strategy.computeCompactCount(messages, 'auto')).toBe(2); - }); - - it('reserves response context by default before the ratio threshold is reached', () => { - const strategy = new DefaultCompactionStrategy(() => 256_000); - - expect(strategy.shouldCompact(210_000)).toBe(true); - expect(strategy.shouldBlock(210_000)).toBe(true); - }); - - it('backs off overflow compaction by at least five percent of the context window', () => { - const strategy = testCompactionStrategy(1_000); - const messages = [ - textMessage('user', 'old user'), - textMessage('assistant', 'old assistant'), - ...Array.from({ length: 20 }, () => [ - textMessage('user', 'continue'), - textMessage('assistant', ''), - ]).flat(), - ]; - - const reduced = strategy.reduceCompactOnOverflow(messages); - const removed = messages.slice(reduced); - - expect(reduced).toBeGreaterThan(0); - expect(estimateTokensForMessages(removed)).toBeGreaterThanOrEqual(50); - }); - - it('ignores reserved context when the reserve is not smaller than the model window', () => { - const strategy = new DefaultCompactionStrategy(() => 32_000, { - triggerRatio: 0.85, - blockRatio: 0.85, - reservedContextSize: 50_000, - maxCompactionPerTurn: 3, - maxRecentMessages: 3, - maxRecentUserMessages: Infinity, - maxRecentSizeRatio: 0.2, - minOverflowReductionRatio: 0.05, - }); - - expect(strategy.shouldCompact(1)).toBe(false); - expect(strategy.shouldBlock(1)).toBe(false); - expect(strategy.shouldCompact(28_000)).toBe(true); - expect(strategy.shouldBlock(28_000)).toBe(true); - }); - it('runs manual compaction and applies the compacted context', async () => { const records: TelemetryRecord[] = []; const ctx = testAgent({ telemetry: recordingTelemetry(records) }); @@ -203,12 +75,12 @@ describe('FullCompaction', () => { [wire] context.append_message { "message": { "role": "user", "content": [ { "type": "text", "text": "recent user three" } ], "toolCalls": [], "origin": { "kind": "user" } }, "time": "