diff --git a/.changeset/compaction-internals-cleanup.md b/.changeset/compaction-internals-cleanup.md new file mode 100644 index 000000000..6d5bc7280 --- /dev/null +++ b/.changeset/compaction-internals-cleanup.md @@ -0,0 +1,5 @@ +--- +"@moonshot-ai/kimi-code": patch +--- + +Tighten compaction bookkeeping so compacted history stays consistent across retries and resume. diff --git a/.changeset/rework-compaction-strategy.md b/.changeset/rework-compaction-strategy.md new file mode 100644 index 000000000..6b42303d2 --- /dev/null +++ b/.changeset/rework-compaction-strategy.md @@ -0,0 +1,5 @@ +--- +"@moonshot-ai/kimi-code": minor +--- + +Rework conversation compaction to keep only real user prompts followed by a user-role summary, dropping assistant and tool messages. diff --git a/apps/vis/server/src/lib/context-projector.ts b/apps/vis/server/src/lib/context-projector.ts index fd7a376e6..40cd5810b 100644 --- a/apps/vis/server/src/lib/context-projector.ts +++ b/apps/vis/server/src/lib/context-projector.ts @@ -1,3 +1,9 @@ +import { + COMPACT_USER_MESSAGE_MAX_TOKENS, + collectCompactableUserMessages, + isRealUserInput, + selectRecentUserMessages, +} from '@moonshot-ai/agent-core'; import type { ContentPart, ContextMessage, @@ -234,19 +240,21 @@ export function projectContext( break; case 'context.apply_compaction': { openSteps = new Map(); - // Mirror agent-core's actual `applyCompaction` behaviour - // (`packages/agent-core/src/agent/context/index.ts`): history becomes - // `[summaryBubble, ...history.slice(compactedCount)]`. The summary is - // an *assistant* message tagged `origin.kind = 'compaction_summary'` - // (using 'system' would skew role counts and any downstream diff - // against agent-core history). The post-compaction tail is preserved - // rather than dropped, so messages still in context stay visible. + // Mirror agent-core's `applyCompaction` + // (`packages/agent-core/src/agent/context/index.ts`): the live history + // becomes the most recent real user messages (verbatim, within a token + // budget) followed by a single user-role summary tagged + // `origin.kind = 'compaction_summary'`. Assistant messages, tool calls, + // and tool results are dropped. The selection rule + // (`selectRecentUserMessages` / `collectCompactableUserMessages`) is the + // same helper agent-core's `ContextMemory` and the web transcript + // reducer apply, so all three views stay in sync. const summaryBubble: ProjectedMessage = { lineNo: entry.lineNo, time: rec.time, source: 'compaction_summary', message: { - role: 'assistant', + role: 'user', content: [{ type: 'text', text: rec.summary }], toolCalls: [], origin: { kind: 'compaction_summary' }, @@ -258,34 +266,49 @@ export function projectContext( tokensAfter: rec.tokensAfter, }, }; + const modelSummaryBubble: ProjectedMessage = + rec.contextSummary === undefined + ? summaryBubble + : { + ...summaryBubble, + message: { + ...summaryBubble.message, + content: [{ type: 'text', text: rec.contextSummary }], + } as ContextMessage, + }; if (mode === 'model') { - // Drop the first `rec.compactedCount` HISTORY entries (NOT array - // entries): agent-core's `compactedCount` indexes into `_history`, - // which never contains our synthetic 'undo'/'clear' markers. Walk the - // array counting only history entries (`isHistoryEntry`) until - // `compactedCount` are passed, then slice there — any UI-only markers - // in the dropped region go with it (correct: they precede the - // compaction). With no markers this is exactly `slice(compactedCount)`. - let sliceAt = messages.length; - let passed = 0; - for (let i = 0; i < messages.length; i++) { - if (passed >= rec.compactedCount) { - sliceAt = i; - break; - } - if (isHistoryEntry(messages[i]!)) passed++; - } - if (passed < rec.compactedCount) sliceAt = messages.length; - messages = [summaryBubble, ...messages.slice(sliceAt)]; + // Rebuild the model's-eye view as the kept user messages + summary. + // `realUserEntries` is filtered with the exact + // `collectCompactableUserMessages` predicate so it stays aligned with + // the selection below (genuine user input only — no injections, + // system triggers, or prior summaries). `selectRecentUserMessages` + // keeps a contiguous suffix of that subsequence, with only the oldest + // kept message possibly truncated, so each kept message maps back onto + // its original ProjectedMessage wrapper (preserving line/time); we swap + // in the (possibly truncated) message object. + const historyEntries = messages.filter(isHistoryEntry); + const realUserEntries = historyEntries.filter( + (pm) => collectCompactableUserMessages([pm.message]).length === 1, + ); + const keptUserMessages = selectRecentUserMessages( + realUserEntries.map((pm) => pm.message), + COMPACT_USER_MESSAGE_MAX_TOKENS, + ); + const suffixStart = realUserEntries.length - keptUserMessages.length; + const keptEntries: ProjectedMessage[] = keptUserMessages.map((message, i) => { + const original = realUserEntries[suffixStart + i]!; + return original.message === message ? original : { ...original, message }; + }); + messages = [...keptEntries, modelSummaryBubble]; } else { // Full history: keep ALL preceding messages, just append the summary // marker inline so the compacted prefix stays visible. messages.push(summaryBubble); } // Mirror agent-core applyCompaction() → microCompaction.reset() (cutoff - // → 0): the message list is rebuilt as [summary, ...tail], so the old - // index-based cutoff no longer points at the same messages. (In full - // mode the blanking pass does not run, so this is a no-op there.) + // → 0): the message list is rebuilt, so the old index-based cutoff no + // longer points at the same messages. (In full mode the blanking pass + // does not run, so this is a no-op there.) microCutoff = 0; // Mirror agent-core applyCompaction() → _tokenCount = result.tokensAfter: // the live context-window fill is now the post-compaction count. Derived @@ -577,16 +600,6 @@ function isHistoryEntry(pm: ProjectedMessage): boolean { return pm.source !== 'undo' && pm.source !== 'clear'; } -/** Mirrors agent-core `isRealUserPrompt` (`agent/context/index.ts`): a message - * counts toward an undo only if it is a genuine user prompt. */ -function isRealUserPrompt(message: ContextMessage): boolean { - if (message.role !== 'user') return false; - const origin = message.origin; - if (origin === undefined || origin.kind === 'user') return true; - if (origin.kind === 'skill_activation') return origin.trigger === 'user-slash'; - return false; -} - /** Single source of truth for the `context.undo` backward walk, shared by both * projection modes. Mirrors agent-core `undo` (`agent/context/index.ts`): walk * from the end, skip `origin.kind === 'injection'` (those are KEPT even when @@ -612,7 +625,7 @@ function computeUndoCutoff( if (origin?.kind === 'compaction_summary') break; // stop removedMessageCount++; cutoff = i; - if (isRealUserPrompt(messages[i]!.message) && ++removedUserCount >= count) break; + if (isRealUserInput(messages[i]!.message) && ++removedUserCount >= count) break; } return { cutoff, removedMessageCount }; } diff --git a/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl b/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl index 317df60b2..9f44d9a7d 100644 --- a/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl +++ b/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl @@ -1,5 +1,6 @@ {"type":"metadata","protocol_version":"1.1","created_at":1779256791085} {"type":"config.update","cwd":"/tmp/work","profileName":"agent","systemPrompt":"You are Kimi.","time":1779256791100} {"type":"context.append_message","message":{"role":"user","content":[{"type":"text","text":"before compaction"}],"toolCalls":[]},"time":1779256800001} -{"type":"context.apply_compaction","summary":"compacted summary","compactedCount":1,"tokensBefore":100,"tokensAfter":30,"time":1779256800500} +{"type":"context.append_message","message":{"role":"assistant","content":[{"type":"text","text":"assistant reply"}],"toolCalls":[]},"time":1779256800200} +{"type":"context.apply_compaction","summary":"compacted summary","compactedCount":2,"tokensBefore":100,"tokensAfter":30,"time":1779256800500} {"type":"context.append_message","message":{"role":"user","content":[{"type":"text","text":"after compaction"}],"toolCalls":[]},"time":1779256801000} diff --git a/apps/vis/server/test/lib/context-projector.test.ts b/apps/vis/server/test/lib/context-projector.test.ts index d2a2d3f4c..fa21b0789 100644 --- a/apps/vis/server/test/lib/context-projector.test.ts +++ b/apps/vis/server/test/lib/context-projector.test.ts @@ -262,33 +262,100 @@ describe('context-projector', () => { { lineNo: 4, data: { type: 'context.append_message' as const, message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'new' }], toolCalls: [] } }, raw: {} }, ]; const proj = projectContext(entries as any); - expect(proj.messages[0]!.source).toBe('compaction_summary'); - // Compaction summary is an assistant message (agent-core's own + // Model view: the kept user prompt + user-role summary + the new prompt. + expect(proj.messages.map((m) => m.source)).toEqual([ + 'append_message', 'compaction_summary', 'append_message', + ]); + expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'old' }); + // The compaction summary is a user message (agent-core's own // representation), not a synthetic system message. - expect(proj.messages[0]!.message.role).toBe('assistant'); - expect(proj.messages[0]!.message.origin).toEqual({ kind: 'compaction_summary' }); - expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'old stuff' }); - expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'new' }); + expect(proj.messages[1]!.message.role).toBe('user'); + expect(proj.messages[1]!.message.origin).toEqual({ kind: 'compaction_summary' }); + expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'old stuff' }); + expect(proj.messages[2]!.message.content[0]).toMatchObject({ text: 'new' }); + }); + + it('uses contextSummary only for the model view and raw summary for full history', () => { + const entries = [ + { lineNo: 1, data: { type: 'context.append_message' as const, + message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'old' }], toolCalls: [] } }, raw: {} }, + { lineNo: 2, data: { type: 'context.apply_compaction' as const, + summary: 'raw summary', contextSummary: 'prefixed summary', compactedCount: 1, tokensBefore: 100, tokensAfter: 10 }, raw: {} }, + ]; + + const model = projectContext(entries as any); + expect(model.messages.map((m) => m.message.content[0])).toMatchObject([ + { text: 'old' }, + { text: 'prefixed summary' }, + ]); + + const full = projectContext(entries as any, 'full'); + expect(full.messages.map((m) => m.message.content[0])).toMatchObject([ + { text: 'old' }, + { text: 'raw summary' }, + ]); }); - it('apply_compaction keeps the post-compaction tail (slice(compactedCount))', () => { + it('apply_compaction keeps the most recent user messages and drops the assistant/tool tail', () => { const entries = [ { lineNo: 1, data: { type: 'context.append_message' as const, message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'm0' }], toolCalls: [] } }, raw: {} }, { lineNo: 2, data: { type: 'context.append_message' as const, message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'm1' }], toolCalls: [] } }, raw: {} }, { lineNo: 3, data: { type: 'context.append_message' as const, - message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'm2 (kept)' }], toolCalls: [] } }, raw: {} }, + message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'm2 (dropped)' }], toolCalls: [] } }, raw: {} }, { lineNo: 4, data: { type: 'context.apply_compaction' as const, - summary: 'sum', compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }, raw: {} }, + summary: 'sum', compactedCount: 3, tokensBefore: 100, tokensAfter: 10 }, raw: {} }, ]; const proj = projectContext(entries as any); - // [summary, m2] — m0 and m1 (the first compactedCount=2) are dropped, m2 kept. - expect(proj.messages).toHaveLength(2); - expect(proj.messages[0]!.source).toBe('compaction_summary'); - expect(proj.messages[0]!.compaction).toEqual({ compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }); - expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'm2 (kept)' }); - expect(proj.messages[1]!.lineNo).toBe(3); + // [m0, m1, summary] — real user prompts are kept verbatim, the assistant + // tail is dropped. + expect(proj.messages).toHaveLength(3); + expect(proj.messages.map((m) => m.source)).toEqual([ + 'append_message', 'append_message', 'compaction_summary', + ]); + expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'm0' }); + expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'm1' }); + expect(proj.messages[2]!.compaction).toEqual({ compactedCount: 3, tokensBefore: 100, tokensAfter: 10 }); + expect(proj.messages[2]!.message.content[0]).toMatchObject({ text: 'sum' }); + }); + + it('apply_compaction drops shell/local-command/background messages in model mode only', () => { + const entries = [ + { lineNo: 1, data: { type: 'context.append_message' as const, + message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'real user' }], toolCalls: [], origin: { kind: 'user' as const } } }, raw: {} }, + { lineNo: 2, data: { type: 'context.append_message' as const, + message: { role: 'user' as const, content: [{ type: 'text' as const, text: '! pwd' }], toolCalls: [], origin: { kind: 'shell_command' as const, phase: 'input' as const } } }, raw: {} }, + { lineNo: 3, data: { type: 'context.append_message' as const, + message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'local output' }], toolCalls: [], origin: { kind: 'injection' as const, variant: 'local-command-stdout' } } }, raw: {} }, + { lineNo: 4, data: { type: 'context.append_message' as const, + message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'background done' }], toolCalls: [], origin: { kind: 'background_task' as const, taskId: 'task', status: 'completed' as const, notificationId: 'notification' } } }, raw: {} }, + { lineNo: 5, data: { type: 'context.append_message' as const, + message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'assistant reply' }], toolCalls: [] } }, raw: {} }, + { lineNo: 6, data: { type: 'context.apply_compaction' as const, + summary: 'sum', compactedCount: 5, tokensBefore: 100, tokensAfter: 10 }, raw: {} }, + { lineNo: 7, data: { type: 'context.append_message' as const, + message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'new' }], toolCalls: [], origin: { kind: 'user' as const } } }, raw: {} }, + ]; + + const model = projectContext(entries as any); + expect(model.messages.map((m) => m.source)).toEqual([ + 'append_message', 'compaction_summary', 'append_message', + ]); + expect(model.messages.map((m) => m.message.content[0])).toMatchObject([ + { text: 'real user' }, { text: 'sum' }, { text: 'new' }, + ]); + + const full = projectContext(entries as any, 'full'); + expect(full.messages.map((m) => m.source)).toEqual([ + 'append_message', 'append_message', 'append_message', 'append_message', + 'append_message', 'compaction_summary', 'append_message', + ]); + expect(full.messages.map((m) => m.message.content[0])).toMatchObject([ + { text: 'real user' }, { text: '! pwd' }, { text: 'local output' }, + { text: 'background done' }, { text: 'assistant reply' }, { text: 'sum' }, + { text: 'new' }, + ]); }); // ---- Fix ④: UI-only markers must not offset agent-core history indices ------ @@ -298,7 +365,7 @@ describe('context-projector', () => { // real history entries (append_message + compaction_summary), skipping // 'undo'/'clear' markers. - it('apply_compaction slices by history index, skipping a preceding undo marker (model)', () => { + it('apply_compaction keeps user messages across a preceding undo marker (model)', () => { const userMsg = (text: string) => ({ role: 'user' as const, content: [{ type: 'text' as const, text }], toolCalls: [], origin: { kind: 'user' as const }, @@ -306,14 +373,10 @@ describe('context-projector', () => { // Step 1: append u1, u2 then undo(1) → removes u2, leaves [u1, ]. // Step 2: append u3, u4 → array is [u1, , u3, u4]. // History entries (agent-core _history, which has NO marker) are the three - // real messages [u1, u3, u4]. A compaction with compactedCount=2 drops the - // first 2 HISTORY entries (u1, u3) — and the undo marker that sits within - // that compacted prefix is dropped with it — keeping exactly [summary, u4]. - // - // The naive `messages.slice(compactedCount=2)` would instead cut the ARRAY at - // index 2, yielding [summary, u3, u4] — it WRONGLY retains the already- - // compacted u3 because the undo marker offset the index by one. This test - // pins the correct history-aware behaviour and FAILS against the naive slice. + // real user prompts [u1, u3, u4]. Compaction keeps all of them (they fit the + // budget) and appends the summary, dropping only the synthetic undo marker. + // This pins that the marker does not offset the kept-user selection — a naive + // array-slice would have retained the wrong prompts. const entries = [ { lineNo: 1, data: { type: 'context.append_message' as const, message: userMsg('u1') }, raw: {} }, { lineNo: 2, data: { type: 'context.append_message' as const, message: userMsg('u2') }, raw: {} }, @@ -321,12 +384,16 @@ describe('context-projector', () => { { lineNo: 4, data: { type: 'context.append_message' as const, message: userMsg('u3') }, raw: {} }, { lineNo: 5, data: { type: 'context.append_message' as const, message: userMsg('u4') }, raw: {} }, { lineNo: 6, data: { type: 'context.apply_compaction' as const, - summary: 'sum', compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }, raw: {} }, + summary: 'sum', compactedCount: 3, tokensBefore: 100, tokensAfter: 10 }, raw: {} }, ]; const proj = projectContext(entries as any); - // Correct: [summary, u4]. The marker and the first 2 history entries are gone. - expect(proj.messages.map((m) => m.source)).toEqual(['compaction_summary', 'append_message']); - expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'u4' }); + // Correct: [u1, u3, u4, summary]. The marker is gone, all real prompts kept. + expect(proj.messages.map((m) => m.source)).toEqual([ + 'append_message', 'append_message', 'append_message', 'compaction_summary', + ]); + expect(proj.messages.map((m) => m.message.content[0])).toMatchObject([ + { text: 'u1' }, { text: 'u3' }, { text: 'u4' }, { text: 'sum' }, + ]); }); it('micro-blanking uses the history index, skipping a preceding undo marker (model)', () => { @@ -675,7 +742,7 @@ describe('context-projector', () => { // marker but do NOT mutate/drop the surrounding message list. 'model' mode // (the default) keeps the existing model's-eye behaviour byte-identical. - it("defaults to 'model' mode when no 2nd arg is passed (compaction drops the prefix)", () => { + it("defaults to 'model' mode when no 2nd arg is passed (keeps recent user messages + summary)", () => { const entries = [ { lineNo: 1, data: { type: 'context.append_message' as const, message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'm0' }], toolCalls: [] } }, raw: {} }, @@ -684,10 +751,14 @@ describe('context-projector', () => { { lineNo: 3, data: { type: 'context.apply_compaction' as const, summary: 'sum', compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }, raw: {} }, ]; - // No 2nd arg → 'model' default: prefix dropped, only the summary remains. + // No 2nd arg → 'model' default: the real user prompts are kept verbatim and + // the summary is appended after them. const proj = projectContext(entries as any); - expect(proj.messages).toHaveLength(1); - expect(proj.messages[0]!.source).toBe('compaction_summary'); + expect(proj.messages.map((m) => m.source)).toEqual([ + 'append_message', 'append_message', 'compaction_summary', + ]); + expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'm0' }); + expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'm1' }); }); it("full mode keeps the pre-compaction messages plus the summary marker plus the tail", () => { diff --git a/apps/vis/server/test/routes/context.test.ts b/apps/vis/server/test/routes/context.test.ts index 486e6175d..6352747e9 100644 --- a/apps/vis/server/test/routes/context.test.ts +++ b/apps/vis/server/test/routes/context.test.ts @@ -69,28 +69,31 @@ describe('context route', () => { cleanup = c; const app = contextRoute(home); - // Default (model view): the pre-compaction message is dropped, leaving - // [summary, after-compaction]. + // Default (model view): the real user prompt before compaction is KEPT, the + // assistant reply is dropped, then the summary, then the post-compaction tail. const modelRes = await app.request('/session_fixture/context?agent=main'); expect(modelRes.status).toBe(200); const modelBody = (await modelRes.json()) as { messages: { source: string; message: { content: { type: string; text?: string }[] } }[]; }; expect(modelBody.messages.map((m) => m.source)).toEqual([ - 'compaction_summary', 'append_message', + 'append_message', 'compaction_summary', 'append_message', ]); + expect(modelBody.messages[0]!.message.content[0]).toMatchObject({ text: 'before compaction' }); + expect(modelBody.messages[2]!.message.content[0]).toMatchObject({ text: 'after compaction' }); - // Full history: the pre-compaction message is KEPT, then the summary marker, - // then the post-compaction tail. + // Full history: every pre-compaction message (user prompt + assistant reply) + // is KEPT, then the summary marker, then the post-compaction tail. const fullRes = await app.request('/session_fixture/context?agent=main&history=full'); expect(fullRes.status).toBe(200); const fullBody = (await fullRes.json()) as { messages: { source: string; message: { content: { type: string; text?: string }[] } }[]; }; expect(fullBody.messages.map((m) => m.source)).toEqual([ - 'append_message', 'compaction_summary', 'append_message', + 'append_message', 'append_message', 'compaction_summary', 'append_message', ]); expect(fullBody.messages[0]!.message.content[0]).toMatchObject({ text: 'before compaction' }); - expect(fullBody.messages[2]!.message.content[0]).toMatchObject({ text: 'after compaction' }); + expect(fullBody.messages[1]!.message.content[0]).toMatchObject({ text: 'assistant reply' }); + expect(fullBody.messages[3]!.message.content[0]).toMatchObject({ text: 'after compaction' }); }); }); diff --git a/packages/agent-core/src/agent/compaction/compaction-instruction.md b/packages/agent-core/src/agent/compaction/compaction-instruction.md index 49b0d80b4..9ffd4b010 100644 --- a/packages/agent-core/src/agent/compaction/compaction-instruction.md +++ b/packages/agent-core/src/agent/compaction/compaction-instruction.md @@ -1,24 +1,21 @@ +You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task. --- This message is a direct task, not part of the above conversation --- -You are now given a task to compact this conversation context according to specific priorities and output requirements. +You are now given a task to compact this conversation context according to the priorities and output requirements below. -Output text only. DO NOT CALL ANY TOOLS. Calling tools will be rejected and fails the task. You already have all the information you need in the conversation history. You have only one chance. +The goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared. -The goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared work. +Compression priorities, in order: -{{ customInstruction }} - - - -1. **Current Task State**: What is being worked on RIGHT NOW -2. **Errors & Solutions**: All encountered errors and their resolutions -3. **Code Evolution**: Final working versions only (remove intermediate attempts) -4. **System Context**: Project structure, dependencies, environment setup -5. **Design Decisions**: Architectural choices and their rationale -6. **TODO Items**: Unfinished tasks and known issues +1. Current Task State: what is being worked on right now +2. Errors & Solutions: unresolved or recurring errors and their resolutions +3. Code Evolution: final working versions only; remove intermediate attempts +4. System Context: project structure, dependencies, environment setup +5. Design Decisions: architectural choices and their rationale +6. TODO Items: unfinished tasks and known issues - +Required output structure: ## Current Focus @@ -54,16 +51,18 @@ The goal of compaction is to keep essential code patterns, technical details, an - [Useful classes/methods/functions]: [Brief description/usage] - ... - +Omit non-critical code, intermediate attempts, and resolved errors. ## Important Context - [Any crucial information not covered above] - ... -## All User Messages +Be concise, structured, and focused on helping the next LLM seamlessly continue the work. -- [Detailed non tool use user message] -- ... +Respond with text only. Do not call any tools — you already have everything you need in the conversation history. - +{% if customInstruction %} +Optional user instruction: +{{ customInstruction }} +{% endif %} diff --git a/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md b/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md new file mode 100644 index 000000000..62a7161b8 --- /dev/null +++ b/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md @@ -0,0 +1 @@ +Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis: \ No newline at end of file diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts index ebd8bfe18..84a9c3502 100644 --- a/packages/agent-core/src/agent/compaction/full.ts +++ b/packages/agent-core/src/agent/compaction/full.ts @@ -22,9 +22,14 @@ import { retryBackoffDelays, sleepForRetry, } from '../../loop/retry'; -import { renderPrompt } from '../../utils/render-prompt'; +import { + renderTodoList, + TODO_STORE_KEY, + type TodoItem, +} from '../../tools/builtin/state/todo-list'; import { estimateTokens, + estimateTokensForMessage, estimateTokensForMessages, estimateTokensForTools, } from '../../utils/tokens'; @@ -32,14 +37,15 @@ import { applyCompletionBudget, resolveCompletionBudget, } from '../../utils/completion-budget'; +import { renderPrompt } from '../../utils/render-prompt'; import compactionInstructionTemplate from './compaction-instruction.md?raw'; -import { renderTodoList, type TodoItem } from '../../tools/builtin/state/todo-list'; import type { CompactionBeginData, CompactionResult } from './types'; import { DEFAULT_COMPACTION_CONFIG, DefaultCompactionStrategy, type CompactionStrategy, } from './strategy'; +import { buildCompactionSummaryText } from './memento'; export const MAX_COMPACTION_RETRY_ATTEMPTS = 5; @@ -62,6 +68,18 @@ export class FullCompaction { blockedByTurn: boolean; } | null = null; private readonly observedMaxContextTokensByModel = new Map(); + // Token count right after the last successful compaction. While no new + // content has been appended (tokenCountWithPending <= this value), the + // history is already in its minimal compacted form ([kept user prompts, + // summary]); re-compacting would only nest summaries, so + // checkAutoCompaction skips in that case even if an observed overflow + // limit still flags the context as oversized. + private lastCompactedTokenCount: number | null = null; + // Counts provider-overflow recoveries in this turn that have not yet been + // followed by a successful step. Trips MAX_OVERFLOW_COMPACTION_ATTEMPTS to + // stop an overflow -> compact -> overflow loop when compaction can no + // longer shrink the request below the model window. + private consecutiveOverflowCompactions = 0; protected readonly strategy: CompactionStrategy; constructor( @@ -77,7 +95,7 @@ export class FullCompaction { reservedContextSize: agent.kimiConfig?.loopControl?.reservedContextSize ?? DEFAULT_COMPACTION_CONFIG.reservedContextSize, - } + }, ); } @@ -139,9 +157,8 @@ export class FullCompaction { }); return; } - const compactedCount = this.strategy.computeCompactCount(this.agent.context.history, data.source); - if (compactedCount === 0) { - throw new KimiError(ErrorCodes.COMPACTION_UNABLE, 'No prefix that can be compacted in current history.'); + if (this.agent.context.history.length === 0) { + throw new KimiError(ErrorCodes.COMPACTION_UNABLE, 'No messages to compact in current history.'); } this.agent.records.logRecord({ type: 'full_compaction.begin', @@ -155,7 +172,7 @@ export class FullCompaction { const abortController = new AbortController(); this.compacting = { abortController, - promise: this.compactionWorker(abortController.signal, data, compactedCount), + promise: this.compactionWorker(abortController.signal, data), blockedByTurn: false, }; } @@ -194,9 +211,20 @@ export class FullCompaction { resetForTurn(): void { this.compactionCountInTurn = 0; + this.lastCompactedTokenCount = null; + this.consecutiveOverflowCompactions = 0; } async handleOverflowError(signal: AbortSignal, error: unknown) { + this.consecutiveOverflowCompactions += 1; + const maxAttempts = this.strategy.maxOverflowCompactionAttempts; + if (this.consecutiveOverflowCompactions > maxAttempts) { + throw new KimiError( + ErrorCodes.CONTEXT_OVERFLOW, + `Compaction failed to bring the context under the model window after ${String(maxAttempts)} attempts.`, + { cause: error instanceof Error ? error : undefined }, + ); + } const didStartCompaction = this.beginAutoCompaction(); if (!didStartCompaction && !this.compacting) throw error; // Always block on overflow errors @@ -211,6 +239,10 @@ export class FullCompaction { } async afterStep(): Promise { + // A completed step means a generate() succeeded, so any prior + // overflow -> compact cycle produced a request that now fits; clear the + // loop guard. + this.consecutiveOverflowCompactions = 0; if (this.strategy.checkAfterStep) { this.checkAutoCompaction(false); } @@ -219,6 +251,12 @@ export class FullCompaction { private checkAutoCompaction(throwOnLimit: boolean = true): boolean { if (this.compacting) return true; + if ( + this.lastCompactedTokenCount !== null && + this.tokenCountWithPending <= this.lastCompactedTokenCount + ) { + return false; + } if (!this.strategy.shouldCompact(this.tokenCountWithPending)) return false; return this.beginAutoCompaction(throwOnLimit); } @@ -258,34 +296,21 @@ export class FullCompaction { private async compactionWorker( signal: AbortSignal, data: Readonly, - compactedCount: number, ): Promise { try { - const finalResult = { - summary: '', - compactedCount: 1, - tokensBefore: 0, - tokensAfter: 0, - }; - - for (let round = 1; ; round++) { - const result = await this.compactionRound(round, signal, data, compactedCount); - if (!result) return; - - finalResult.summary = result.summary; - finalResult.compactedCount += result.compactedCount - 1; - finalResult.tokensBefore += result.tokensBefore - finalResult.tokensAfter; - finalResult.tokensAfter = result.tokensAfter; - - if (result.tokensBefore - result.tokensAfter < 1024) break; - if (!this.strategy.shouldBlock(result.tokensAfter)) break; - compactedCount = this.strategy.computeCompactCount(this.agent.context.history, data.source); - if (compactedCount === 0) break; - } + const result = await this.compactionRound(signal, data); + if (!result) return; this.markCompleted(); - this.agent.emitEvent({ type: 'compaction.completed', result: finalResult }); - await this.agent.injection.injectGoal(); - this.triggerPostCompactHook(data, finalResult); + try { + await this.agent.refreshSystemPrompt(); + } catch (error) { + this.agent.log.error('failed to refresh system prompt after compaction', { error }); + } + const { contextSummary: _contextSummary, ...eventResult } = result; + void _contextSummary; + this.agent.emitEvent({ type: 'compaction.completed', result: eventResult }); + await this.agent.injection.injectAfterCompaction(); + this.triggerPostCompactHook(data, result); } catch (error) { if (isAbortError(error)) return; const blockedByTurn = this.compacting?.blockedByTurn === true; @@ -301,19 +326,31 @@ export class FullCompaction { } } + private buildInstruction(customInstruction: string | undefined): string { + return renderPrompt(compactionInstructionTemplate, { + customInstruction: customInstruction?.trim() ?? '', + }).trimEnd(); + } + + private postProcessSummary(summary: string): string { + const storeData = this.agent.tools.storeData(); + const todos = (storeData[TODO_STORE_KEY] as readonly TodoItem[] | undefined) ?? []; + if (todos.length === 0) { + return summary; + } + const todoMarkdown = renderTodoList(todos, '## TODO List'); + return `${summary.trim()}\n\n${todoMarkdown}`; + } + private async compactionRound( - round: number, signal: AbortSignal, data: Readonly, - initialCompactedCount: number, - ) { + ): Promise { const startedAt = Date.now(); const originalHistory = [...this.agent.context.history]; const tokensBefore = estimateTokensForMessages(originalHistory); let retryCount = 0; try { - let compactedCount = initialCompactedCount; - await this.triggerPreCompactHook(data, tokensBefore, signal); const model = this.agent.config.model; @@ -337,15 +374,21 @@ export class FullCompaction { }), capability, }); + const instruction = this.buildInstruction(data.instruction); const delays = retryBackoffDelays(MAX_COMPACTION_RETRY_ATTEMPTS); - let usage: TokenUsage | null; - let summary: string; + let usage: TokenUsage | null = null; + let summary: string | undefined; + // Compact the whole history, trimming old messages only when the + // summarizer request itself cannot fit. Any trimmed messages are not + // covered by the produced summary; `droppedCount` reports that blind spot. + let historyForModel = originalHistory; + let droppedCount = 0; + let overflowShrinkCount = 0; while (true) { - const messagesToCompact = originalHistory.slice(0, compactedCount); const messages = [ - ...this.agent.context.project(messagesToCompact), - createUserMessage(renderPrompt(compactionInstructionTemplate, { customInstruction: data.instruction ?? '' })), + ...this.agent.context.project(historyForModel), + createUserMessage(instruction), ]; const estimatedCompactionRequestTokens = this.estimateRequestTokens(messages); try { @@ -371,14 +414,31 @@ export class FullCompaction { if (isContextOverflow) { this.observeContextOverflow(estimatedCompactionRequestTokens); } - if ( - isContextOverflow || + if (isContextOverflow && historyForModel.length > 1) { + overflowShrinkCount += 1; + if (overflowShrinkCount > MAX_COMPACTION_OVERFLOW_SHRINK_ATTEMPTS) { + throw error; + } + const before = historyForModel.length; + historyForModel = shrinkCompactionHistoryAfterOverflow( + historyForModel, + overflowShrinkCount, + ); + droppedCount += before - historyForModel.length; + retryCount = 0; + continue; + } + const shouldShrinkAfterEmptyOrTruncated = error instanceof CompactionTruncatedError || - error instanceof APIEmptyResponseError // e.g. think-only - ) { - compactedCount = this.strategy.reduceCompactOnOverflow(messagesToCompact); + error instanceof APIEmptyResponseError; + if (shouldShrinkAfterEmptyOrTruncated && historyForModel.length > 1) { + const before = historyForModel.length; + historyForModel = dropOldestMessageAndLeadingToolResults(historyForModel); + droppedCount += before - historyForModel.length; + retryCount = 0; + continue; } - else if (!isRetryableGenerateError(error)) { + if (!isRetryableGenerateError(error)) { throw error; } if (retryCount + 1 >= MAX_COMPACTION_RETRY_ATTEMPTS) { @@ -402,17 +462,15 @@ export class FullCompaction { } } - summary = this.postProcessSummary(summary); - - const recent = originalHistory.slice(compactedCount); - const tokensAfter = estimateTokens(summary) + estimateTokensForMessages(recent); - - const result: CompactionResult = { - summary, - compactedCount, + const rawSummary = this.postProcessSummary(summary ?? ''); + const contextSummary = buildCompactionSummaryText(rawSummary); + const result = this.agent.context.applyCompaction({ + summary: rawSummary, + contextSummary, + compactedCount: originalHistory.length, tokensBefore, - tokensAfter, - }; + droppedCount: droppedCount === 0 ? undefined : droppedCount, + }); // Telemetry keys are snake_case, but the `context.apply_compaction` // record written below keeps its persisted camelCase field names @@ -424,22 +482,23 @@ export class FullCompaction { tokens_after: result.tokensAfter, duration_ms: Date.now() - startedAt, compacted_count: result.compactedCount, + dropped_count: result.droppedCount, retry_count: retryCount, - round, + round: 1, thinking_level: this.agent.config.thinkingLevel, ...(usage === null ? {} : { input_tokens: inputTotal(usage), output_tokens: usage.output }), }); - this.agent.context.applyCompaction(result); + this.lastCompactedTokenCount = result.tokensAfter; return result; } catch (error) { - if (isAbortError(error)) return; + if (isAbortError(error)) return undefined; this.agent.telemetry.track('compaction_failed', { source: data.source, tokens_before: tokensBefore, duration_ms: Date.now() - startedAt, - round, + round: 1, retry_count: retryCount, thinking_level: this.agent.config.thinkingLevel, error_type: error instanceof Error ? error.name : 'Unknown', @@ -478,16 +537,52 @@ export class FullCompaction { }, }); } +} - private postProcessSummary(summary: string): string { - const storeData = this.agent.tools.storeData(); - const todos = (storeData['todo'] as readonly TodoItem[] | undefined) ?? []; - if (todos.length === 0) { - return summary; - } - const todoMarkdown = renderTodoList(todos, '## TODO List'); - return `${summary.trim()}\n\n${todoMarkdown}`; +const MAX_COMPACTION_OVERFLOW_SHRINK_ATTEMPTS = 3; +const COMPACTION_OVERFLOW_SHRINK_RATIOS = [0.7, 0.5, 0.35] as const; + +function shrinkCompactionHistoryAfterOverflow( + messages: readonly T[], + attempt: number, +): T[] { + if (messages.length <= 1) return messages.slice(); + const ratio = COMPACTION_OVERFLOW_SHRINK_RATIOS[ + Math.min(attempt - 1, COMPACTION_OVERFLOW_SHRINK_RATIOS.length - 1) + ]!; + const tokenBudget = Math.floor(estimateTokensForMessages(messages) * ratio); + return takeRecentMessagesWithinTokenBudget(messages, tokenBudget); +} + +function takeRecentMessagesWithinTokenBudget( + messages: readonly T[], + tokenBudget: number, +): T[] { + let start = messages.length; + let tokens = 0; + for (let i = messages.length - 1; i >= 0; i--) { + const messageTokens = estimateTokensForMessage(messages[i]!); + if (tokens + messageTokens > tokenBudget) break; + tokens += messageTokens; + start = i; + } + if (start === 0) start = 1; + return dropLeadingToolResults(messages.slice(start)); +} + +function dropOldestMessageAndLeadingToolResults( + messages: readonly T[], +): T[] { + if (messages.length <= 1) return messages.slice(); + return dropLeadingToolResults(messages.slice(1)); +} + +function dropLeadingToolResults(messages: readonly T[]): T[] { + let start = 0; + while (start < messages.length && messages[start]!.role === 'tool') { + start += 1; } + return messages.slice(start); } function extractCompactionSummary(response: GenerateResult): string { diff --git a/packages/agent-core/src/agent/compaction/index.ts b/packages/agent-core/src/agent/compaction/index.ts index 4f92ac9fe..4e209f83b 100644 --- a/packages/agent-core/src/agent/compaction/index.ts +++ b/packages/agent-core/src/agent/compaction/index.ts @@ -2,3 +2,4 @@ export * from './full'; export * from './micro'; export * from './strategy'; export * from './types'; +export * from './memento'; diff --git a/packages/agent-core/src/agent/compaction/memento.ts b/packages/agent-core/src/agent/compaction/memento.ts new file mode 100644 index 000000000..061f50f05 --- /dev/null +++ b/packages/agent-core/src/agent/compaction/memento.ts @@ -0,0 +1,162 @@ +import type { ContentPart } from '@moonshot-ai/kosong'; +import { estimateTokensForMessage } from '../../utils/tokens'; +import type { PromptOrigin } from '../context/types'; +import summaryPrefixTemplate from './compaction-summary-prefix.md?raw'; + +/** + * "Memento" compaction helpers. + * + * Compaction rewrites the model context as: the most recent user messages + * (verbatim, within a token budget) followed by a single user-role summary + * that is prefixed with `COMPACTION_SUMMARY_PREFIX`. Assistant messages, + * tool calls, and tool results are dropped. These helpers apply the exact + * same rule for both the live context rewrite and the transcript reducer. + */ + +export const COMPACTION_SUMMARY_PREFIX = summaryPrefixTemplate.trimEnd(); +export const COMPACT_USER_MESSAGE_MAX_TOKENS = 20_000; + +/** + * Structural subset of kosong's `Message` that the memento helpers inspect. + * Both `ContextMessage` (the live context) and the wire-transcript reducer's + * mutable message satisfy this shape, so one set of helpers serves both + * layers without introducing a shared nominal type. `origin` is what tells + * real user input apart from injections and compaction summaries. + */ +interface MessageLike { + readonly role: string; + readonly content: readonly ContentPart[]; + readonly origin?: PromptOrigin | undefined; +} + +export type CompactionUserDisposition = 'keep' | 'drop'; + +/** + * Single source of truth for whether a user-role message survives compaction as + * genuine user input. Codex-style semantics: only real user prompts and + * user-slash skill activations are kept verbatim. Everything else user-role is + * either rebuilt by injectors after compaction or intentionally ephemeral, so + * it is dropped from the live context even when transcript/replay retains it + * for UI rendering. New `PromptOrigin` kinds must update this switch. + */ +export function compactionUserMessageDisposition( + origin: PromptOrigin | undefined, +): CompactionUserDisposition { + if (origin === undefined) return 'keep'; + switch (origin.kind) { + case 'user': + return 'keep'; + case 'skill_activation': + return origin.trigger === 'user-slash' ? 'keep' : 'drop'; + case 'injection': + case 'shell_command': + case 'compaction_summary': + case 'system_trigger': + case 'background_task': + case 'cron_job': + case 'cron_missed': + case 'hook_result': + case 'retry': + return 'drop'; + default: { + const _exhaustive: never = origin; + void _exhaustive; + return 'drop'; + } + } +} + +function extractText(content: readonly ContentPart[]): string { + let text = ''; + for (const part of content) { + if (part.type === 'text') { + text += part.text; + } + } + return text; +} + +export function isCompactionSummaryMessage(message: MessageLike): boolean { + return message.origin?.kind === 'compaction_summary'; +} + +/** + * Keep only genuine user input (real user prompts and user-slash skill + * activations). See `compactionUserMessageDisposition` for the full keep/drop + * policy and the rationale for each origin. + */ +export function isRealUserInput(message: MessageLike): boolean { + return message.role === 'user' && compactionUserMessageDisposition(message.origin) === 'keep'; +} + +export function collectCompactableUserMessages(messages: readonly T[]): T[] { + return messages.filter( + (message) => isRealUserInput(message) && !isCompactionSummaryMessage(message), + ); +} + +function truncateTextToTokens(text: string, maxTokens: number): string { + if (maxTokens <= 0) return ''; + // Single pass: walk the string once, mirroring estimateTokens' heuristic + // (ASCII ~4 chars/token, non-ASCII ~1 char/token) and stop at the first + // code point that would push the running total over the budget. This keeps + // CJK-heavy inputs from the O(n^2) cost of re-estimating shrinking prefixes. + let asciiCount = 0; + let nonAsciiCount = 0; + let end = 0; + for (const char of text) { + if (char.codePointAt(0)! <= 127) { + asciiCount++; + } else { + nonAsciiCount++; + } + if (Math.ceil(asciiCount / 4) + nonAsciiCount > maxTokens) break; + end += char.length; + } + return text.slice(0, end); +} + +function truncateUserMessage(message: T, maxTokens: number): T { + const text = truncateTextToTokens(extractText(message.content), maxTokens); + // Spread the original message to preserve every field (notably `origin`), + // then replace the content with the truncated text and drop any tool calls. + // Real user input never carries tool calls, so clearing them is safe. The + // cast back to `T` is unavoidable here: TypeScript cannot prove that a + // spread-then-override shape still equals the generic `T`. + return { + ...message, + content: [{ type: 'text', text }], + toolCalls: [], + } as unknown as T; +} + +/** + * Keep the most recent user messages whose cumulative estimated size fits + * `maxTokens`. The oldest kept message is truncated to the remaining budget + * when it would otherwise overflow; older messages are dropped. + */ +export function selectRecentUserMessages( + messages: readonly T[], + maxTokens: number = COMPACT_USER_MESSAGE_MAX_TOKENS, +): T[] { + const selected: T[] = []; + let remaining = maxTokens; + for (let i = messages.length - 1; i >= 0 && remaining > 0; i--) { + const message = messages[i]!; + const tokens = estimateTokensForMessage(message); + if (tokens <= remaining) { + selected.push(message); + remaining -= tokens; + } else { + selected.push(truncateUserMessage(message, remaining)); + break; + } + } + selected.reverse(); + return selected; +} + +export function buildCompactionSummaryText(summary: string): string { + const suffix = summary.trim(); + return `${COMPACTION_SUMMARY_PREFIX}\n${suffix.length > 0 ? suffix : '(no summary available)'}`; +} diff --git a/packages/agent-core/src/agent/compaction/strategy.ts b/packages/agent-core/src/agent/compaction/strategy.ts index edf9132e0..d409d6e8d 100644 --- a/packages/agent-core/src/agent/compaction/strategy.ts +++ b/packages/agent-core/src/agent/compaction/strategy.ts @@ -1,43 +1,48 @@ -import type { Message } from "@moonshot-ai/kosong"; -import { estimateTokensForMessage } from "../../utils/tokens"; -import type { CompactionSource } from "./types"; +import type { CompactionSource } from './types'; export interface CompactionConfig { + /** Fraction of the model context window that triggers auto-compaction. */ triggerRatio: number; + /** Fraction of the model context window that blocks the turn on compaction. */ blockRatio: number; + /** Reserved output budget; compaction triggers early to leave this much room. */ reservedContextSize: number; + /** Maximum number of auto-compactions allowed in a single turn. */ maxCompactionPerTurn: number; - maxRecentMessages: number; - maxRecentUserMessages: number; - maxRecentSizeRatio: number; - minOverflowReductionRatio: number; + /** + * Consecutive provider-overflow recoveries (overflow -> compact -> overflow + * again) allowed in a single turn before giving up. Caps the loop when + * compaction can no longer shrink the request below the model window. + */ + maxOverflowCompactionAttempts: number; } +/** + * Auto-compact at 85% of the resolved context window. `blockRatio` matches + * `triggerRatio` so compaction runs synchronously with no background + * compaction. + */ export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = { triggerRatio: 0.85, - blockRatio: 0.85, // Same as triggerRatio to disable async compaction + blockRatio: 0.85, reservedContextSize: 50_000, maxCompactionPerTurn: Infinity, - maxRecentMessages: 4, - maxRecentUserMessages: Infinity, - maxRecentSizeRatio: 0.2, - minOverflowReductionRatio: 0.05, + maxOverflowCompactionAttempts: 3, }; export interface CompactionStrategy { shouldCompact(usedSize: number): boolean; shouldBlock(usedSize: number): boolean; - computeCompactCount(messages: readonly Message[], source: CompactionSource): number; - reduceCompactOnOverflow(messages: readonly Message[]): number; readonly checkAfterStep: boolean; readonly maxCompactionPerTurn: number; + readonly maxOverflowCompactionAttempts: number; } export class DefaultCompactionStrategy implements CompactionStrategy { constructor( protected readonly maxSizeProvider: () => number, - protected readonly config: CompactionConfig = DEFAULT_COMPACTION_CONFIG - ) { } + protected readonly config: CompactionConfig = DEFAULT_COMPACTION_CONFIG, + ) {} protected get maxSize(): number { return this.maxSizeProvider(); @@ -64,111 +69,6 @@ export class DefaultCompactionStrategy implements CompactionStrategy { return reservedSize > 0 && reservedSize < this.maxSize && usedSize + reservedSize >= this.maxSize; } - computeCompactCount(messages: readonly Message[], source: CompactionSource): number { - // Return value: N messages to be compacted (0 means no compaction possible) - // LLM Input: messages.slice(0, N) + [user:instruction] - // Preserved recent messages: messages.slice(N) - - // Manual compaction - if (source === 'manual') { - for (let i = messages.length - 1; i > 0; i--) { - if (canSplitAfter(messages, i)) { - return this.fitCompactCountToWindow(messages, i + 1); - } - } - return 0; - } - - // Auto compaction rules (in order of precedence): - // 1. The split after messages[N-1] must be safe per `canSplitAfter`: - // messages[N-1] is not a user or asst-with-tool-calls, and the retained - // suffix messages.slice(N) has no orphan tool result. - // 2. At least one recent message must be preserved - // 3. At most maxRecentMessages recent messages should be preserved - // 4. At most maxRecentUserMessages recent user messages should be preserved - // 5. At most maxRecentSizeRatio * maxSize recent messages should be preserved - // 6. N should be as small as possible - - let recentMessages = 1; - let recentUserMessages = 0; - let recentSize = 0; - let bestN: number | undefined; - - for (; recentMessages < messages.length; recentMessages++) { - const splitIndex = messages.length - recentMessages - 1; - const m2 = messages[messages.length - recentMessages]!; - - if (m2.role === 'user') { - recentUserMessages++; - } - recentSize += estimateTokensForMessage(m2); - - if (canSplitAfter(messages, splitIndex)) { - bestN = splitIndex + 1; - } - - const reachesMax = recentMessages >= this.config.maxRecentMessages - || recentUserMessages >= this.config.maxRecentUserMessages - || recentSize >= this.maxSize * this.config.maxRecentSizeRatio; - if (reachesMax && bestN !== undefined) { - break; - } - } - - return this.fitCompactCountToWindow(messages, bestN ?? 0); - } - - reduceCompactOnOverflow(messages: readonly Message[]): number { - const minReducedSize = Math.max( - 1, - Math.ceil(this.maxSize * this.config.minOverflowReductionRatio), - ); - let reducedSize = 0; - let bestN: number | undefined; - - for (let i = messages.length - 2; i > 0; i--) { - reducedSize += estimateTokensForMessage(messages[i + 1]!); - if (canSplitAfter(messages, i)) { - bestN = i + 1; - if (reducedSize >= minReducedSize) { - return i + 1; - } - } - } - return bestN ?? messages.length; - } - - private fitCompactCountToWindow( - messages: readonly Message[], - compactedCount: number, - ): number { - if (this.maxSize <= 0 || compactedCount <= 0) { - return compactedCount; - } - - let compactedSize = 0; - for (let i = 0; i < compactedCount; i++) { - compactedSize += estimateTokensForMessage(messages[i]!); - } - if (compactedSize <= this.maxSize) { - return compactedCount; - } - - let bestN: number | undefined; - for (let n = compactedCount - 1; n > 0; n--) { - compactedSize -= estimateTokensForMessage(messages[n]!); - if (!canSplitAfter(messages, n - 1)) { - continue; - } - bestN = n; - if (compactedSize <= this.maxSize) { - return n; - } - } - - return bestN ?? compactedCount; - } - get checkAfterStep(): boolean { return this.config.triggerRatio !== this.config.blockRatio; } @@ -176,45 +76,10 @@ export class DefaultCompactionStrategy implements CompactionStrategy { get maxCompactionPerTurn(): number { return this.config.maxCompactionPerTurn; } -} -/** - * Decide whether a compaction split is safe to place immediately after - * `messages[index]`. A split is safe only when: - * - `messages[index]` itself is not a user message or an assistant message - * with pending tool calls (cutting either of those off from what follows - * would break the conversation), AND - * - the next message is not a tool result. The history is well-formed: - * tool results only appear after their owning `asst_w_tc` and all tool - * results for one exchange land consecutively before the next non-tool - * message. So if the suffix starts with a tool result, its `asst_w_tc` - * must be in the compacted prefix, which would orphan that result - * (e.g. splitting between tool_a and tool_b of a parallel call), AND - * - the compacted prefix itself does not end with an unresolved tool - * exchange, because pending tool results must remain in the retained tail. - */ -function canSplitAfter(messages: readonly Message[], index: number): boolean { - const m = messages[index]; - if (m === undefined) return false; - if (m.role === 'user') return false; - if (m.role === 'assistant' && m.toolCalls.length > 0) return false; - if (messages[index + 1]?.role === 'tool') return false; - if (prefixEndsWithOpenToolExchange(messages, index)) return false; - return true; -} - -function prefixEndsWithOpenToolExchange(messages: readonly Message[], index: number): boolean { - if (messages[index]?.role !== 'tool') return false; - - let toolResultCount = 0; - for (let i = index; i >= 0; i--) { - const message = messages[i]; - if (message === undefined) return false; - if (message.role === 'tool') { - toolResultCount++; - continue; - } - return message.role === 'assistant' && message.toolCalls.length > toolResultCount; + get maxOverflowCompactionAttempts(): number { + return this.config.maxOverflowCompactionAttempts; } - return false; } + +export type { CompactionSource }; diff --git a/packages/agent-core/src/agent/compaction/types.ts b/packages/agent-core/src/agent/compaction/types.ts index 820365cdc..cef3c5308 100644 --- a/packages/agent-core/src/agent/compaction/types.ts +++ b/packages/agent-core/src/agent/compaction/types.ts @@ -1,10 +1,46 @@ export interface CompactionResult { + /** Human-facing summary text produced by the compaction model. */ summary: string; + /** + * Exact summary message stored in the live model context. It includes the + * compaction prefix that tells the next model this is handoff context rather + * than a real user prompt. Optional for backward compatibility with older + * wire records, where `summary` was also the model-context text. + */ + contextSummary?: string; compactedCount: number; tokensBefore: number; tokensAfter: number; + /** + * Number of real user messages kept verbatim ahead of the summary in the + * post-compaction live context. Written by `ContextMemory.applyCompaction` + * (the single derivation point for the post-compaction shape) so the + * wire-transcript reducer can reproduce the live folded length without + * re-deriving it from the full transcript. Optional for backward + * compatibility with older wire records. + */ + keptUserMessageCount?: number; + /** + * Number of oldest messages trimmed from the summarizer input when the + * compaction request itself overflowed the model window. These messages are + * not covered by the produced summary — a real-user message among them may + * still be retained verbatim in the live context via `keptUserMessageCount`, + * but assistant/tool messages are lost. Surfacing the count lets records and + * telemetry report the summary's blind spot honestly. Optional for backward + * compatibility with older wire records. + */ + droppedCount?: number; } +/** + * Inputs `ContextMemory.applyCompaction` needs to derive a `CompactionResult`. + * `tokensAfter` / `keptUserMessageCount` / `droppedCount` are optional: the live + * path fills in what it knows, while restore passes the persisted record so its + * historical values are preserved verbatim. + */ +export type CompactionInput = Pick & + Partial>; + export type CompactionSource = 'manual' | 'auto'; export interface CompactionBeginData { diff --git a/packages/agent-core/src/agent/context/index.ts b/packages/agent-core/src/agent/context/index.ts index cf4c88395..eab7337e0 100644 --- a/packages/agent-core/src/agent/context/index.ts +++ b/packages/agent-core/src/agent/context/index.ts @@ -3,9 +3,16 @@ import { createToolMessage, type ContentPart, type Message } from '@moonshot-ai/ import type { Agent } from '..'; import { ErrorCodes, KimiError } from '../../errors'; import type { ExecutableToolResult, LoopRecordedEvent } from '../../loop'; -import { estimateTokensForMessages } from '../../utils/tokens'; +import { estimateTokens, estimateTokensForMessages } from '../../utils/tokens'; import { escapeXml } from '../../utils/xml-escape'; -import type { CompactionResult } from '../compaction'; +import { + COMPACT_USER_MESSAGE_MAX_TOKENS, + collectCompactableUserMessages, + isRealUserInput, + selectRecentUserMessages, + type CompactionInput, + type CompactionResult, +} from '../compaction'; import { project, trimTrailingOpenToolExchange } from './projector'; import { USER_PROMPT_ORIGIN, @@ -172,7 +179,7 @@ export class ContextMemory { this._tokenCount -= estimateTokensForMessages([message]); } - if (isRealUserPrompt(message)) { + if (isRealUserInput(message)) { removedUserCount++; if (removedUserCount >= count) break; } @@ -205,7 +212,36 @@ export class ContextMemory { } } - applyCompaction(result: CompactionResult): void { + applyCompaction(input: CompactionInput): CompactionResult { + // Single derivation point for the post-compaction shape: the most recent + // real user messages (verbatim, within the token budget) followed by a + // user-role summary. `tokensAfter` and `keptUserMessageCount` are derived + // here from the actual `_history` so the live context, the wire record, + // and the transcript reducer all agree — re-deriving them elsewhere (e.g. + // from the full transcript, which still holds the untruncated originals of + // messages the live context truncated) would diverge. + const keptUserMessages = selectRecentUserMessages( + collectCompactableUserMessages(this._history), + COMPACT_USER_MESSAGE_MAX_TOKENS, + ); + // Live compaction omits these so they are derived from the actual + // `_history`; restore passes the persisted record so its historical values + // are preserved verbatim. Older wire records did not have `contextSummary`, + // so their `summary` remains the model-context text during restore. + const contextSummary = input.contextSummary ?? input.summary; + const tokensAfter = + input.tokensAfter ?? + estimateTokens(contextSummary) + estimateTokensForMessages(keptUserMessages); + const keptUserMessageCount = input.keptUserMessageCount ?? keptUserMessages.length; + const result: CompactionResult = { + summary: input.summary, + contextSummary, + compactedCount: input.compactedCount, + tokensBefore: input.tokensBefore, + tokensAfter, + keptUserMessageCount, + droppedCount: input.droppedCount, + }; this.agent.records.logRecord({ type: 'context.apply_compaction', ...result, @@ -213,27 +249,34 @@ export class ContextMemory { this.agent.replayBuilder.patchLast('compaction', { result: { summary: result.summary, + contextSummary: result.contextSummary, compactedCount: result.compactedCount, tokensBefore: result.tokensBefore, tokensAfter: result.tokensAfter, + keptUserMessageCount: result.keptUserMessageCount, + droppedCount: result.droppedCount, }, }); this._history = [ + ...keptUserMessages, { - role: 'assistant', - content: [{ type: 'text', text: result.summary }], + role: 'user', + content: [{ type: 'text', text: contextSummary }], toolCalls: [], origin: { kind: 'compaction_summary' }, }, - ...this._history.slice(result.compactedCount), ]; this.openSteps.clear(); - this.flushDeferredMessagesIfToolExchangeClosed(); + this.pendingToolResultIds.clear(); + // Drop deferred messages (mostly injections/system reminders) instead of + // flushing them: initial context is rebuilt every turn. + this.deferredMessages = []; this._tokenCount = result.tokensAfter; this.tokenCountCoveredMessageCount = this._history.length; this.agent.microCompaction.reset(); - this.agent.injection.onContextCompacted(result.compactedCount); + this.agent.injection.onContextCompacted(); this.agent.emitStatusUpdated(); + return result; } data(): AgentContextData { @@ -461,16 +504,6 @@ function isEmptyOutputText(output: string): boolean { return output.length === 0 || output.trim() === TOOL_OUTPUT_EMPTY_TEXT; } -function isRealUserPrompt(message: ContextMessage): boolean { - if (message.role !== 'user') return false; - const origin = message.origin; - if (origin === undefined || origin.kind === 'user') return true; - if (origin.kind === 'skill_activation') { - return origin.trigger === 'user-slash'; - } - return false; -} - function formatUndoUnavailableMessage( requestedCount: number, undoableCount: number, diff --git a/packages/agent-core/src/agent/index.ts b/packages/agent-core/src/agent/index.ts index 4e733a80c..e17b98c8b 100644 --- a/packages/agent-core/src/agent/index.ts +++ b/packages/agent-core/src/agent/index.ts @@ -11,7 +11,11 @@ import type { EnabledPluginSessionStart } from '#/plugin'; import type { McpConnectionManager } from '../mcp'; import { FlagResolver, type ExperimentalFlagResolver } from '../flags'; -import type { PreparedSystemPromptContext, ResolvedAgentProfile } from '../profile'; +import { + prepareSystemPromptContext, + type PreparedSystemPromptContext, + type ResolvedAgentProfile, +} from '../profile'; import type { ModelProvider } from '../session/provider-manager'; import type { SessionSubagentHost } from '../session/subagent-host'; import { noopTelemetryClient, type TelemetryClient } from '../telemetry'; @@ -82,6 +86,7 @@ export interface AgentOptions { readonly experimentalFlags?: ExperimentalFlagResolver; readonly replay?: ReplayBuilderOptions; readonly additionalDirs?: readonly string[]; + readonly systemPromptContextProvider?: (() => Promise) | undefined; } export class Agent { @@ -127,6 +132,9 @@ export class Agent { readonly replayBuilder: ReplayBuilder; private additionalDirs: readonly string[]; + private activeProfile?: ResolvedAgentProfile; + private brandHome?: string; + private readonly systemPromptContextProvider?: (() => Promise) | undefined; constructor(options: AgentOptions) { this.type = options.type ?? 'main'; @@ -145,6 +153,7 @@ export class Agent { this.telemetry = options.telemetry ?? noopTelemetryClient; this.experimentalFlags = options.experimentalFlags ?? new FlagResolver(); this.additionalDirs = normalizeAdditionalDirs(options.additionalDirs ?? []); + this.systemPromptContextProvider = options.systemPromptContextProvider; this.llmRequestLogger = new LlmRequestLogger(this.log); this.blobStore = options.homedir @@ -248,7 +257,41 @@ export class Agent { }); } - useProfile(profile: ResolvedAgentProfile, context?: PreparedSystemPromptContext): void { + useProfile( + profile: ResolvedAgentProfile, + context?: PreparedSystemPromptContext, + brandHome?: string, + ): void { + this.setActiveProfile(profile, brandHome); + this.updateSystemPromptFromProfile(profile, context); + this.tools.setActiveTools(profile.tools); + } + + setActiveProfile(profile: ResolvedAgentProfile, brandHome?: string): void { + this.activeProfile = profile; + this.brandHome = brandHome; + } + + /** + * Re-render the system prompt with freshly gathered runtime context (cwd + * listing, AGENTS.md, additional-dirs info, skill list). Called after + * compaction so the post-compaction turns do not keep a snapshot captured + * at session bootstrap. Invalidates the prompt-cache prefix by design. + */ + async refreshSystemPrompt(): Promise { + if (this.activeProfile === undefined) return; + const context = this.systemPromptContextProvider === undefined + ? await prepareSystemPromptContext(this.kaos, this.brandHome, { + additionalDirs: this.additionalDirs, + }) + : await this.systemPromptContextProvider(); + this.updateSystemPromptFromProfile(this.activeProfile, context); + } + + private updateSystemPromptFromProfile( + profile: ResolvedAgentProfile, + context?: PreparedSystemPromptContext, + ): void { const systemPrompt = profile.systemPrompt({ osEnv: this.kaos.osEnv, cwd: this.config.cwd, @@ -258,7 +301,6 @@ export class Agent { additionalDirsInfo: context?.additionalDirsInfo, }); this.config.update({ profileName: profile.name, systemPrompt }); - this.tools.setActiveTools(profile.tools); } async resume(options?: AgentRecordsReplayOptions): Promise<{ warning?: string }> { diff --git a/packages/agent-core/src/agent/injection/injector.ts b/packages/agent-core/src/agent/injection/injector.ts index 504e412de..d13e18159 100644 --- a/packages/agent-core/src/agent/injection/injector.ts +++ b/packages/agent-core/src/agent/injection/injector.ts @@ -9,11 +9,8 @@ export abstract class DynamicInjector { this.injectedAt = null; } - onContextCompacted(compactedCount: number): void { - if (this.injectedAt !== null) { - const newInjectedAt = this.injectedAt - compactedCount + 1; - this.injectedAt = newInjectedAt >= 0 ? newInjectedAt : null; - } + onContextCompacted(): void { + this.injectedAt = null; } onContextMessageRemoved(index: number): void { diff --git a/packages/agent-core/src/agent/injection/manager.ts b/packages/agent-core/src/agent/injection/manager.ts index 99c9cd07e..7103f2cdd 100644 --- a/packages/agent-core/src/agent/injection/manager.ts +++ b/packages/agent-core/src/agent/injection/manager.ts @@ -40,16 +40,21 @@ export class InjectionManager { await this.activeGoalInjector()?.inject(); } + async injectAfterCompaction(): Promise { + await this.injectGoal(); + await this.inject(); + } + onContextClear(): void { for (const injector of this.lifecycleInjectors()) { injector.onContextClear(); } } - onContextCompacted(compactedCount: number): void { + onContextCompacted(): void { for (const injector of this.lifecycleInjectors()) { try { - injector.onContextCompacted(compactedCount); + injector.onContextCompacted(); } catch { continue; } diff --git a/packages/agent-core/src/agent/injection/permission-mode.ts b/packages/agent-core/src/agent/injection/permission-mode.ts index 638ed6760..ffe5389ad 100644 --- a/packages/agent-core/src/agent/injection/permission-mode.ts +++ b/packages/agent-core/src/agent/injection/permission-mode.ts @@ -15,13 +15,20 @@ const AUTO_MODE_EXIT_REMINDER = [ export class PermissionModeInjector extends DynamicInjector { protected override readonly injectionVariant = 'permission_mode'; private lastMode: PermissionMode | undefined; + private refreshAfterCompaction = false; + + override onContextCompacted(): void { + this.injectedAt = null; + this.refreshAfterCompaction = true; + } getInjection(): string | undefined { const mode = this.agent.permission.mode; const previousMode = this.lastMode; - if (mode === previousMode) return undefined; + if (!this.refreshAfterCompaction && mode === previousMode) return undefined; + this.refreshAfterCompaction = false; this.lastMode = mode; if (mode === 'auto') return AUTO_MODE_ENTER_REMINDER; if (previousMode === 'auto') return AUTO_MODE_EXIT_REMINDER; diff --git a/packages/agent-core/src/agent/turn/index.ts b/packages/agent-core/src/agent/turn/index.ts index df115b6d5..d2fb3c8c5 100644 --- a/packages/agent-core/src/agent/turn/index.ts +++ b/packages/agent-core/src/agent/turn/index.ts @@ -662,9 +662,15 @@ export class TurnFlow { }, hooks: { beforeStep: async ({ signal: stepSignal }) => { - this.flushSteerBuffer(); this.agent.microCompaction.detect(); await this.agent.fullCompaction.beforeStep(stepSignal); + // Flush steered messages (background-task / cron notifications, + // user interrupts) AFTER compaction so they land in the + // post-compaction context instead of being dropped by it. The + // keep/drop decision lives in + // `compactionUserMessageDisposition()`; these origins are not + // re-injected later, so append them only after compaction runs. + this.flushSteerBuffer(); await this.agent.injection.inject(); deduper.beginStep(); return; diff --git a/packages/agent-core/src/index.ts b/packages/agent-core/src/index.ts index 14dcec22a..ae63a8604 100644 --- a/packages/agent-core/src/index.ts +++ b/packages/agent-core/src/index.ts @@ -62,6 +62,12 @@ export type { export { AGENT_WIRE_PROTOCOL_VERSION } from './agent/records'; export type { AgentConfigUpdateData } from './agent/config'; export type { CompactionBeginData, CompactionResult } from './agent/compaction'; +export { + COMPACT_USER_MESSAGE_MAX_TOKENS, + collectCompactableUserMessages, + isRealUserInput, + selectRecentUserMessages, +} from './agent/compaction'; export type { PermissionApprovalResultRecord, PermissionMode, diff --git a/packages/agent-core/src/services/message/transcript.ts b/packages/agent-core/src/services/message/transcript.ts index e98bed516..a5ecfb7f1 100644 --- a/packages/agent-core/src/services/message/transcript.ts +++ b/packages/agent-core/src/services/message/transcript.ts @@ -3,8 +3,10 @@ * agent from its `wire.jsonl` record log. * * Why: `ContextMemory.applyCompaction` rewrites the in-memory history as - * `[compaction_summary, ...tail]`, so `getContext().history` only reflects the - * model's CURRENT context. The wire log, however, keeps every record. The TUI + * `[...keptUserMessages, compaction_summary]` (the most recent real user + * prompts, verbatim within a token budget, followed by a single user-role + * summary), so `getContext().history` only reflects the model's CURRENT + * context. The wire log, however, keeps every record. The TUI * shows the full transcript on resume because `ReplayBuilder` captures every * `pushHistory` during record replay and is never folded by compaction. This * module reproduces that exact view for daemon REST consumers (web), without @@ -19,8 +21,11 @@ * open assistant message; tool.result appends a * tool message with the same `` status * wrapping as `toolResultOutputForModel` - * - `context.apply_compaction` → keep the prefix, insert the summary message - * at the fold point (origin `compaction_summary`) + * - `context.apply_compaction` → keep the full history, append the + * user-role summary marker (origin + * `compaction_summary`), and recover + * `foldedLength` from the recorded + * `keptUserMessageCount` * - `context.undo` → remove tail messages exactly like * `ContextMemory.undo` (skip injections, stop at * compaction summaries / `context.clear` floors) @@ -45,6 +50,12 @@ import path from 'node:path'; import type { AgentRecord } from '../../agent/records'; import type { ContextMessage } from '../../agent/context'; import type { ExecutableToolResult, LoopRecordedEvent } from '../../loop'; +import { + COMPACT_USER_MESSAGE_MAX_TOKENS, + collectCompactableUserMessages, + isRealUserInput, + selectRecentUserMessages, +} from '../../agent/compaction'; type ContentPart = ContextMessage['content'][number]; @@ -212,7 +223,7 @@ export function reduceWireRecords(records: Iterable): { if (message.origin?.kind === 'compaction_summary') break; transcript.splice(i, 1); foldedLength = Math.max(0, foldedLength - 1); - if (isRealUserPrompt(message)) { + if (isRealUserInput(message)) { removedUserCount++; if (removedUserCount >= count) break; } @@ -238,22 +249,40 @@ export function reduceWireRecords(records: Iterable): { applyLoopEvent(record.event, record.time); break; case 'context.apply_compaction': { - // ContextMemory drops history[0..compactedCount] and prepends the - // summary; we keep the prefix and insert the summary at the fold - // point so the transcript shows both. - const tailLength = Math.max(0, foldedLength - record.compactedCount); - transcript.splice(Math.max(0, transcript.length - tailLength), 0, { + // Mirrors ContextMemory.applyCompaction: the live context becomes the + // most recent user messages followed by a user-role summary. The + // transcript keeps the full history and appends the summary marker; + // foldedLength tracks the post-compaction live context length. + transcript.push({ message: { - role: 'assistant', + role: 'user', content: [{ type: 'text', text: record.summary }], toolCalls: [], origin: { kind: 'compaction_summary' }, }, time: record.time, }); - foldedLength = tailLength + 1; - openSteps.clear(); - flushDeferredIfToolExchangeClosed(); + // Prefer the kept-user count recorded by the live + // ContextMemory.applyCompaction. Re-deriving it from the full + // transcript would diverge from the live context: the transcript still + // holds the untruncated originals of messages the live context may + // have truncated, and (after a clear) messages the live context no + // longer has. Only fall back to re-deriving for legacy wire records + // that predate the field. + if (record.keptUserMessageCount !== undefined) { + foldedLength = record.keptUserMessageCount + 1; + } else { + const keptUserMessages = selectRecentUserMessages( + collectCompactableUserMessages(transcript.map((entry) => entry.message)), + COMPACT_USER_MESSAGE_MAX_TOKENS, + ); + foldedLength = keptUserMessages.length + 1; + } + // Drop any open tool exchange and deferred messages exactly like + // ContextMemory.applyCompaction: late tool results become orphans and + // deferred injections are not rebuilt, so pending ids must not strand + // later appends in `deferred`. + resetOpenState(); break; } case 'context.undo': @@ -272,17 +301,6 @@ export function reduceWireRecords(records: Iterable): { return { entries: transcript as TranscriptEntry[], foldedLength }; } -/** Mirrors agent-core's `isRealUserPrompt` (context undo accounting). */ -function isRealUserPrompt(message: MutableMessage): boolean { - if (message.role !== 'user') return false; - const origin = message.origin; - if (origin === undefined || origin.kind === 'user') return true; - if (origin.kind === 'skill_activation') { - return origin.trigger === 'user-slash'; - } - return false; -} - /** Mirrors agent-core's `toolResultOutputForModel` + `createToolMessage`. */ function toolResultContent(result: ExecutableToolResult): ContentPart[] { const output = result.output; diff --git a/packages/agent-core/src/services/session/sessionService.ts b/packages/agent-core/src/services/session/sessionService.ts index 3b684a0dc..18e0370b4 100644 --- a/packages/agent-core/src/services/session/sessionService.ts +++ b/packages/agent-core/src/services/session/sessionService.ts @@ -1,6 +1,7 @@ import { Disposable, IInstantiationService, InstantiationType, registerSingleton } from '../../di'; import { Emitter } from '../../base/common/event'; import { ErrorCodes, KimiError } from '../../errors'; +import { isRealUserInput } from '../../agent/compaction'; import type { AgentContextData, ContextMessage } from '../../agent/context'; import type { JsonObject, ListSessionsPayload, SessionSummary } from '../../rpc'; import type { SessionMeta } from '../../session'; @@ -59,7 +60,7 @@ function canUndoHistory(history: readonly ContextMessage[], count: number): bool if (message === undefined) continue; if (message.origin?.kind === 'injection') continue; if (message.origin?.kind === 'compaction_summary') return false; - if (isRealUserPrompt(message)) { + if (isRealUserInput(message)) { found++; if (found >= count) return true; } @@ -67,13 +68,6 @@ function canUndoHistory(history: readonly ContextMessage[], count: number): bool return false; } -function isRealUserPrompt(message: ContextMessage): boolean { - if (message.role !== 'user') return false; - const origin = message.origin; - if (origin === undefined || origin.kind === 'user') return true; - return origin.kind === 'skill_activation' && origin.trigger === 'user-slash'; -} - function pageContextMessages( sessionId: string, sessionCreatedAtMs: number, diff --git a/packages/agent-core/src/session/index.ts b/packages/agent-core/src/session/index.ts index a2bb022b5..c49de7f9e 100644 --- a/packages/agent-core/src/session/index.ts +++ b/packages/agent-core/src/session/index.ts @@ -470,7 +470,7 @@ export class Session { this.options.kimiHomeDir, { additionalDirs: this.additionalDirs }, ); - agent.useProfile(profile, context); + agent.useProfile(profile, context, this.options.kimiHomeDir); const { agentsMdWarning } = context; if (agentsMdWarning !== undefined) { this.agentsMdWarning = agentsMdWarning; @@ -718,7 +718,8 @@ export class Session { ): Agent { const parentAgent = parentAgentId !== null ? this.getReadyAgent(parentAgentId) : undefined; const cwd = parentAgent?.config.cwd ?? this.toolKaos.getcwd(); - return new Agent({ + let agent!: Agent; + agent = new Agent({ ...config, type, kaos: this.toolKaos.withCwd(cwd), @@ -737,7 +738,14 @@ export class Session { pluginSessionStarts: type === 'main' ? this.options.pluginSessionStarts : undefined, experimentalFlags: this.experimentalFlags, additionalDirs: parentAgent?.getAdditionalDirs() ?? this.additionalDirs, + systemPromptContextProvider: () => + prepareSystemPromptContext( + this.systemContextKaos(agent.kaos.getcwd()), + this.options.kimiHomeDir, + { additionalDirs: agent.getAdditionalDirs() }, + ), }); + return agent; } private permissionOptions( @@ -810,6 +818,7 @@ export class Session { try { const agent = this.instantiateAgent(id, meta.homedir, meta.type, {}, parentAgentId); const result = await agent.resume(); + this.restoreAgentProfileHandle(agent, meta, parent?.agent); this.agents.set(id, agent); return { agent, warning: parent?.warning ?? result.warning }; } catch (error) { @@ -821,6 +830,34 @@ export class Session { } } + private restoreAgentProfileHandle( + agent: Agent, + meta: AgentMeta, + parentAgent: Agent | undefined, + ): void { + if (agent.config.systemPrompt === '') return; + const profile = this.resolvePersistedProfile(agent, meta, parentAgent); + if (profile === undefined) return; + agent.setActiveProfile(profile, this.options.kimiHomeDir); + } + + private resolvePersistedProfile( + agent: Agent, + meta: AgentMeta, + parentAgent: Agent | undefined, + ): ResolvedAgentProfile | undefined { + const profileName = agent.config.profileName; + if (profileName === undefined) return undefined; + if (meta.type === 'sub') { + const parentProfileName = parentAgent?.config.profileName; + return ( + DEFAULT_AGENT_PROFILES[parentProfileName ?? 'agent']?.subagents?.[profileName] ?? + DEFAULT_AGENT_PROFILES['agent']?.subagents?.[profileName] + ); + } + return DEFAULT_AGENT_PROFILES[profileName]; + } + private nextGeneratedAgentId(): string { while (true) { const id = `agent-${this.agentIdCounter++}`; diff --git a/packages/agent-core/src/session/subagent-host.ts b/packages/agent-core/src/session/subagent-host.ts index 5153acea5..01c3063f3 100644 --- a/packages/agent-core/src/session/subagent-host.ts +++ b/packages/agent-core/src/session/subagent-host.ts @@ -374,7 +374,7 @@ export class SessionSubagentHost { this.session.options.kimiHomeDir, { additionalDirs: child.getAdditionalDirs() }, ); - child.useProfile(profile, context); + child.useProfile(profile, context, this.session.options.kimiHomeDir); child.tools.inheritUserTools(parent.tools); } diff --git a/packages/agent-core/src/utils/tokens.ts b/packages/agent-core/src/utils/tokens.ts index fe567f732..af8b70152 100644 --- a/packages/agent-core/src/utils/tokens.ts +++ b/packages/agent-core/src/utils/tokens.ts @@ -1,6 +1,19 @@ import type { ContentPart, Message, Tool } from '@moonshot-ai/kosong'; -const messageTokenEstimateCache = new WeakMap(); +/** + * Structural subset of kosong's {@link Message} that token estimation reads. + * Accepting the subset (instead of the full `Message`) lets callers with + * message-shaped objects — such as the compaction helpers in `memento.ts`, + * which carry only `role`/`content`/`origin` — estimate tokens without an + * unsafe cast, while full `Message` values still satisfy it. + */ +interface TokenEstimatableMessage { + readonly role: string; + readonly content: readonly ContentPart[]; + readonly toolCalls?: readonly { readonly name: string; readonly arguments: unknown }[]; +} + +const messageTokenEstimateCache = new WeakMap(); /** * Estimate token count from text using a character-based heuristic. @@ -41,7 +54,7 @@ export function estimateTokensForTools(tools: readonly Tool[]): number { return total; } -export function estimateTokensForMessage(message: Message): number { +export function estimateTokensForMessage(message: TokenEstimatableMessage): number { const cached = messageTokenEstimateCache.get(message); if (cached !== undefined) { return cached; diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts index 8adf12c6e..284c48cdd 100644 --- a/packages/agent-core/test/agent/compaction/full.test.ts +++ b/packages/agent-core/test/agent/compaction/full.test.ts @@ -18,10 +18,14 @@ import { afterEach, describe, expect, it, vi } from 'vitest'; import type { KimiConfig } from '../../../src/config'; import type { AgentOptions } from '../../../src/agent'; -import { DefaultCompactionStrategy, type CompactionStrategy } from '../../../src/agent/compaction'; +import { + COMPACTION_SUMMARY_PREFIX, + DefaultCompactionStrategy, + type CompactionStrategy, +} from '../../../src/agent/compaction'; import { FLAG_DEFINITIONS, MASTER_ENV } from '../../../src/flags'; import { HookEngine, type HookEngineTriggerArgs } from '../../../src/session/hooks'; -import { estimateTokensForMessages } from '../../../src/utils/tokens'; +import { estimateTokens, estimateTokensForMessages } from '../../../src/utils/tokens'; import { recordingTelemetry, type TelemetryRecord } from '../../fixtures/telemetry'; import type { TestAgentContext, TestAgentOptions } from '../harness/agent'; import { testAgent } from '../harness/agent'; @@ -44,138 +48,6 @@ const CATALOGUED_MODEL_CAPABILITIES = { const MICRO_COMPACTION_FLAG_ENV = getMicroCompactionFlagEnv(); describe('FullCompaction', () => { - it('keeps an oversized trailing user message as recent', () => { - const strategy = testCompactionStrategy(); - const messages = [ - textMessage('user', 'old user'), - textMessage('assistant', 'old assistant'), - textMessage('user', `pending user ${'x'.repeat(1_200)}`), - ]; - - expect(strategy.computeCompactCount(messages, 'auto')).toBe(2); - }); - - it('keeps consecutive trailing user messages as recent', () => { - const strategy = testCompactionStrategy(); - const messages = [ - textMessage('user', 'old user'), - textMessage('assistant', 'old assistant'), - textMessage('user', `pending user one ${'x'.repeat(1_200)}`), - textMessage('user', `pending user two ${'x'.repeat(1_200)}`), - ]; - - expect(strategy.computeCompactCount(messages, 'auto')).toBe(2); - }); - - it('compacts the prefix when the trailing exchange itself is oversized', () => { - const strategy = testCompactionStrategy(); - const messages = [ - textMessage('user', 'old user'), - textMessage('assistant', 'old assistant'), - textMessage('user', 'recent user'), - textMessage('assistant', `recent assistant ${'x'.repeat(1_200)}`), - ]; - - expect(strategy.computeCompactCount(messages, 'auto')).toBe(2); - }); - - it('returns 0 when there is nothing to compact', () => { - const strategy = testCompactionStrategy(); - expect(strategy.computeCompactCount([], 'auto')).toBe(0); - expect(strategy.computeCompactCount([textMessage('user', 'only pending')], 'auto')).toBe(0); - expect( - strategy.computeCompactCount( - [ - textMessage('user', 'a'), - textMessage('user', 'b'), - textMessage('user', 'c'), - ], - 'auto', - ), - ).toBe(0); - }); - - it('returns 0 when no intermediate split exists and the last message is also unsplittable', () => { - const strategy = testCompactionStrategy(); - const messages: Message[] = [ - textMessage('user', 'inspect'), - { - role: 'assistant', - content: [], - toolCalls: [{ type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' }], - }, - ]; - - expect(strategy.computeCompactCount(messages, 'auto')).toBe(0); - }); - - it('does not split inside a parallel tool exchange', () => { - const strategy = testCompactionStrategy(); - const messages: Message[] = [ - textMessage('user', 'old user'), - textMessage('assistant', 'old assistant'), - textMessage('user', 'run both tools'), - { - role: 'assistant', - content: [], - toolCalls: [ - { type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' }, - { type: 'function', id: 'call_b', name: 'Lookup', arguments: '{}' }, - ], - }, - { role: 'tool', content: [{ type: 'text', text: 'a' }], toolCalls: [], toolCallId: 'call_a' }, - { role: 'tool', content: [{ type: 'text', text: 'b' }], toolCalls: [], toolCallId: 'call_b' }, - textMessage('user', 'next prompt'), - ]; - - // The only valid split is before the parallel exchange (after 'old assistant'), - // never between tool_a and tool_b — that would leave tool_b as an orphan. - expect(strategy.computeCompactCount(messages, 'auto')).toBe(2); - }); - - it('reserves response context by default before the ratio threshold is reached', () => { - const strategy = new DefaultCompactionStrategy(() => 256_000); - - expect(strategy.shouldCompact(210_000)).toBe(true); - expect(strategy.shouldBlock(210_000)).toBe(true); - }); - - it('backs off overflow compaction by at least five percent of the context window', () => { - const strategy = testCompactionStrategy(1_000); - const messages = [ - textMessage('user', 'old user'), - textMessage('assistant', 'old assistant'), - ...Array.from({ length: 20 }, () => [ - textMessage('user', 'continue'), - textMessage('assistant', ''), - ]).flat(), - ]; - - const reduced = strategy.reduceCompactOnOverflow(messages); - const removed = messages.slice(reduced); - - expect(reduced).toBeGreaterThan(0); - expect(estimateTokensForMessages(removed)).toBeGreaterThanOrEqual(50); - }); - - it('ignores reserved context when the reserve is not smaller than the model window', () => { - const strategy = new DefaultCompactionStrategy(() => 32_000, { - triggerRatio: 0.85, - blockRatio: 0.85, - reservedContextSize: 50_000, - maxCompactionPerTurn: 3, - maxRecentMessages: 3, - maxRecentUserMessages: Infinity, - maxRecentSizeRatio: 0.2, - minOverflowReductionRatio: 0.05, - }); - - expect(strategy.shouldCompact(1)).toBe(false); - expect(strategy.shouldBlock(1)).toBe(false); - expect(strategy.shouldCompact(28_000)).toBe(true); - expect(strategy.shouldBlock(28_000)).toBe(true); - }); - it('runs manual compaction and applies the compacted context', async () => { const records: TelemetryRecord[] = []; const ctx = testAgent({ telemetry: recordingTelemetry(records) }); @@ -204,12 +76,12 @@ describe('FullCompaction', () => { [wire] context.append_message { "message": { "role": "user", "content": [ { "type": "text", "text": "recent user three" } ], "toolCalls": [], "origin": { "kind": "user" } }, "time": "