From 74ff07f15ba6eaf35464ac44c2d025f1b05e7aa8 Mon Sep 17 00:00:00 2001
From: 7Sageer <7sageer@djwcb.cn>
Date: Thu, 25 Jun 2026 20:15:17 +0800
Subject: [PATCH 01/16] feat(agent-core): rework compaction to keep only user
 prompts and summary

Compact the whole history, keeping only real user prompts within a 20k token budget followed by a user-role summary prefixed with SUMMARY_PREFIX. Replace the compaction prompt with SUMMARIZATION_PROMPT, trigger auto-compaction at 90% of the context window, and drop assistant/tool messages and deferred injections on compaction.
---
 .changeset/rework-compaction-strategy.md      |   5 +
 .../compaction/compaction-instruction.md      |  74 +--
 .../compaction/compaction-summary-prefix.md   |   1 +
 .../agent-core/src/agent/compaction/full.ts   | 116 ++--
 .../agent-core/src/agent/compaction/index.ts  |   1 +
 .../src/agent/compaction/memento.ts           | 113 ++++
 .../src/agent/compaction/strategy.ts          | 177 +----
 .../agent-core/src/agent/context/index.ts     |  20 +-
 .../src/services/message/transcript.ts        |  23 +-
 .../test/agent/compaction/full.test.ts        | 608 ++++++------------
 .../test/agent/compaction/memento.test.ts     |  99 +++
 .../test/agent/compaction/micro.test.ts       |   8 +-
 .../test/agent/compaction/strategy.test.ts    | 208 ++----
 .../agent-core/test/agent/context.test.ts     |  46 +-
 packages/agent-core/test/agent/resume.test.ts |   9 +-
 .../test/prompt-placeholders.test.ts          |   1 -
 .../test/services/message-transcript.test.ts  |  42 +-
 packages/server/test/sessions.e2e.test.ts     |   2 +-
 .../server/test/snapshotService.unit.test.ts  |   6 +-
 19 files changed, 624 insertions(+), 935 deletions(-)
 create mode 100644 .changeset/rework-compaction-strategy.md
 create mode 100644 packages/agent-core/src/agent/compaction/compaction-summary-prefix.md
 create mode 100644 packages/agent-core/src/agent/compaction/memento.ts
 create mode 100644 packages/agent-core/test/agent/compaction/memento.test.ts

diff --git a/.changeset/rework-compaction-strategy.md b/.changeset/rework-compaction-strategy.md
new file mode 100644
index 000000000..6b42303d2
--- /dev/null
+++ b/.changeset/rework-compaction-strategy.md
@@ -0,0 +1,5 @@
+---
+"@moonshot-ai/kimi-code": minor
+---
+
+Rework conversation compaction to keep only real user prompts followed by a user-role summary, dropping assistant and tool messages.
diff --git a/packages/agent-core/src/agent/compaction/compaction-instruction.md b/packages/agent-core/src/agent/compaction/compaction-instruction.md
index 49b0d80b4..42fae605d 100644
--- a/packages/agent-core/src/agent/compaction/compaction-instruction.md
+++ b/packages/agent-core/src/agent/compaction/compaction-instruction.md
@@ -1,69 +1,9 @@
+You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.
 
---- This message is a direct task, not part of the above conversation ---
+Include:
+- Current progress and key decisions made
+- Important context, constraints, or user preferences
+- What remains to be done (clear next steps)
+- Any critical data, examples, or references needed to continue
 
-You are now given a task to compact this conversation context according to specific priorities and output requirements.
-
-Output text only. DO NOT CALL ANY TOOLS. Calling tools will be rejected and fails the task. You already have all the information you need in the conversation history. You have only one chance.
-
-The goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared work.
-
-{{ customInstruction }}
-
-<!-- Compression Priorities (in order) -->
-
-1. **Current Task State**: What is being worked on RIGHT NOW
-2. **Errors & Solutions**: All encountered errors and their resolutions
-3. **Code Evolution**: Final working versions only (remove intermediate attempts)
-4. **System Context**: Project structure, dependencies, environment setup
-5. **Design Decisions**: Architectural choices and their rationale
-6. **TODO Items**: Unfinished tasks and known issues
-
-<!-- Required Output Structure -->
-
-## Current Focus
-
-[What we're working on now]
-
-## Environment
-
-- [Key setup/config points]
-- ...
-
-## Completed Tasks
-
-- [Task]: [Brief outcome]
-- ...
-
-## Active Issues
-
-- [Issue]: [Status/Next steps]
-- ...
-
-## Code State
-
-### [Critical file name]
-
-[Brief description of the file's purpose and current state]
-
-```
-[The latest version of critical code snippets in this file, <20 lines]
-```
-
-### [Critical file name]
-
-- [Useful classes/methods/functions]: [Brief description/usage]
-- ...
-
-<!-- Omit non-critical code, intermediate attempts, and resolved errors -->
-
-## Important Context
-
-- [Any crucial information not covered above]
-- ...
-
-## All User Messages
-
-- [Detailed non tool use user message]
-- ...
-
-<!-- Must output a summary matching the above template in the **final answer**, not in thinking. -->
+Be concise, structured, and focused on helping the next LLM seamlessly continue the work.
diff --git a/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md b/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md
new file mode 100644
index 000000000..62a7161b8
--- /dev/null
+++ b/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md
@@ -0,0 +1 @@
+Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
\ No newline at end of file
diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts
index 2d608bae5..5380181dc 100644
--- a/packages/agent-core/src/agent/compaction/full.ts
+++ b/packages/agent-core/src/agent/compaction/full.ts
@@ -19,7 +19,6 @@ import {
   retryBackoffDelays,
   sleepForRetry,
 } from '../../loop/retry';
-import { renderPrompt } from '../../utils/render-prompt';
 import {
   estimateTokens,
   estimateTokensForMessages,
@@ -29,13 +28,18 @@ import {
   resolveCompletionBudget,
 } from '../../utils/completion-budget';
 import compactionInstructionTemplate from './compaction-instruction.md?raw';
-import { renderTodoList, type TodoItem } from '../../tools/builtin/state/todo-list';
 import type { CompactionBeginData, CompactionResult } from './types';
 import {
   DEFAULT_COMPACTION_CONFIG,
   DefaultCompactionStrategy,
   type CompactionStrategy,
 } from './strategy';
+import {
+  COMPACT_USER_MESSAGE_MAX_TOKENS,
+  buildCompactionSummaryText,
+  collectCompactableUserMessages,
+  selectRecentUserMessages,
+} from './memento';
 
 export const MAX_COMPACTION_RETRY_ATTEMPTS = 5;
 
@@ -68,7 +72,7 @@ export class FullCompaction {
           reservedContextSize:
             agent.kimiConfig?.loopControl?.reservedContextSize ??
             DEFAULT_COMPACTION_CONFIG.reservedContextSize,
-        }
+        },
       );
   }
 
@@ -91,9 +95,8 @@ export class FullCompaction {
       });
       return;
     }
-    const compactedCount = this.strategy.computeCompactCount(this.agent.context.history, data.source);
-    if (compactedCount === 0) {
-      throw new KimiError(ErrorCodes.COMPACTION_UNABLE, 'No prefix that can be compacted in current history.');
+    if (this.agent.context.history.length === 0) {
+      throw new KimiError(ErrorCodes.COMPACTION_UNABLE, 'No messages to compact in current history.');
     }
     this.agent.records.logRecord({
       type: 'full_compaction.begin',
@@ -107,7 +110,7 @@ export class FullCompaction {
     const abortController = new AbortController();
     this.compacting = {
       abortController,
-      promise: this.compactionWorker(abortController.signal, data, compactedCount),
+      promise: this.compactionWorker(abortController.signal, data),
       blockedByTurn: false,
     };
   }
@@ -202,34 +205,14 @@ export class FullCompaction {
   private async compactionWorker(
     signal: AbortSignal,
     data: Readonly<CompactionBeginData>,
-    compactedCount: number,
   ): Promise<void> {
     try {
-      const finalResult = {
-        summary: '',
-        compactedCount: 1,
-        tokensBefore: 0,
-        tokensAfter: 0,
-      };
-
-      for (let round = 1; ; round++) {
-        const result = await this.compactionRound(round, signal, data, compactedCount);
-        if (!result) return;
-
-        finalResult.summary = result.summary;
-        finalResult.compactedCount += result.compactedCount - 1;
-        finalResult.tokensBefore += result.tokensBefore - finalResult.tokensAfter;
-        finalResult.tokensAfter = result.tokensAfter;
-
-        if (result.tokensBefore - result.tokensAfter < 1024) break;
-        if (!this.strategy.shouldBlock(result.tokensAfter)) break;
-        compactedCount = this.strategy.computeCompactCount(this.agent.context.history, data.source);
-        if (compactedCount === 0) break;
-      }
+      const result = await this.compactionRound(signal, data);
+      if (!result) return;
       this.markCompleted();
-      this.agent.emitEvent({ type: 'compaction.completed', result: finalResult });
+      this.agent.emitEvent({ type: 'compaction.completed', result });
       await this.agent.injection.injectGoal();
-      this.triggerPostCompactHook(data, finalResult);
+      this.triggerPostCompactHook(data, result);
     } catch (error) {
       if (isAbortError(error)) return;
       const blockedByTurn = this.compacting?.blockedByTurn === true;
@@ -245,19 +228,23 @@ export class FullCompaction {
     }
   }
 
+  private buildInstruction(customInstruction: string | undefined): string {
+    const base = compactionInstructionTemplate.trimEnd();
+    if (customInstruction === undefined || customInstruction.trim().length === 0) {
+      return base;
+    }
+    return `${base}\n\n${customInstruction}`;
+  }
+
   private async compactionRound(
-    round: number,
     signal: AbortSignal,
     data: Readonly<CompactionBeginData>,
-    initialCompactedCount: number,
-  ) {
+  ): Promise<CompactionResult | undefined> {
     const startedAt = Date.now();
     const originalHistory = [...this.agent.context.history];
     const tokensBefore = estimateTokensForMessages(originalHistory);
     let retryCount = 0;
     try {
-      let compactedCount = initialCompactedCount;
-
       await this.triggerPreCompactHook(data, tokensBefore, signal);
 
       const model = this.agent.config.model;
@@ -268,15 +255,20 @@ export class FullCompaction {
         }),
         capability: this.agent.config.modelCapabilities,
       });
+      const instruction = this.buildInstruction(data.instruction);
 
       const delays = retryBackoffDelays(MAX_COMPACTION_RETRY_ATTEMPTS);
-      let usage: TokenUsage | null;
-      let summary: string;
+      let usage: TokenUsage | null = null;
+      let summary: string | undefined;
+      // Compact the whole history, dropping the oldest item on overflow to
+      // preserve the prefix-cache-friendly tail. `historyForModel` is the
+      // (possibly trimmed) view sent to the model; the summary is always built
+      // from the untouched `originalHistory`.
+      let historyForModel = originalHistory;
       while (true) {
-        const messagesToCompact = originalHistory.slice(0, compactedCount);
         const messages = [
-          ...this.agent.context.project(messagesToCompact),
-          createUserMessage(renderPrompt(compactionInstructionTemplate, { customInstruction: data.instruction ?? '' })),
+          ...this.agent.context.project(historyForModel),
+          createUserMessage(instruction),
         ];
         try {
           const response = await this.agent.generate(
@@ -294,14 +286,16 @@ export class FullCompaction {
           summary = extractCompactionSummary(response);
           break;
         } catch (error) {
-          if (
+          const isOverflow =
             error instanceof APIContextOverflowError ||
             error instanceof CompactionTruncatedError ||
-            error instanceof APIEmptyResponseError // e.g. think-only
-          ) {
-            compactedCount = this.strategy.reduceCompactOnOverflow(messagesToCompact);
+            error instanceof APIEmptyResponseError;
+          if (isOverflow && historyForModel.length > 1) {
+            historyForModel = historyForModel.slice(1);
+            retryCount = 0;
+            continue;
           }
-          else if (!isRetryableGenerateError(error)) {
+          if (!isRetryableGenerateError(error)) {
             throw error;
           }
           if (retryCount + 1 >= MAX_COMPACTION_RETRY_ATTEMPTS) {
@@ -325,14 +319,16 @@ export class FullCompaction {
         }
       }
 
-      summary = this.postProcessSummary(summary);
-
-      const recent = originalHistory.slice(compactedCount);
-      const tokensAfter = estimateTokens(summary) + estimateTokensForMessages(recent);
+      const summaryText = buildCompactionSummaryText(summary ?? '');
+      const keptUserMessages = selectRecentUserMessages(
+        collectCompactableUserMessages(originalHistory),
+        COMPACT_USER_MESSAGE_MAX_TOKENS,
+      );
+      const tokensAfter = estimateTokens(summaryText) + estimateTokensForMessages(keptUserMessages);
 
       const result: CompactionResult = {
-        summary,
-        compactedCount,
+        summary: summaryText,
+        compactedCount: originalHistory.length,
         tokensBefore,
         tokensAfter,
       };
@@ -343,7 +339,7 @@ export class FullCompaction {
         duration: Date.now() - startedAt,
         compactedCount: result.compactedCount,
         retryCount,
-        round,
+        round: 1,
         thinkingLevel: this.agent.config.thinkingLevel,
         ...usage,
         ...data,
@@ -351,12 +347,12 @@ export class FullCompaction {
       this.agent.context.applyCompaction(result);
       return result;
     } catch (error) {
-      if (isAbortError(error)) return;
+      if (isAbortError(error)) return undefined;
       this.agent.telemetry.track('compaction_failed', {
         ...data,
         tokensBefore,
         duration: Date.now() - startedAt,
-        round,
+        round: 1,
         retryCount,
         thinkingLevel: this.agent.config.thinkingLevel,
         errorType: error instanceof Error ? error.name : 'Unknown',
@@ -395,16 +391,6 @@ export class FullCompaction {
       },
     });
   }
-
-  private postProcessSummary(summary: string): string {
-    const storeData = this.agent.tools.storeData();
-    const todos = (storeData['todo'] as readonly TodoItem[] | undefined) ?? [];
-    if (todos.length === 0) {
-      return summary;
-    }
-    const todoMarkdown = renderTodoList(todos, '## TODO List');
-    return `${summary.trim()}\n\n${todoMarkdown}`;
-  }
 }
 
 function extractCompactionSummary(response: GenerateResult): string {
diff --git a/packages/agent-core/src/agent/compaction/index.ts b/packages/agent-core/src/agent/compaction/index.ts
index 4f92ac9fe..4e209f83b 100644
--- a/packages/agent-core/src/agent/compaction/index.ts
+++ b/packages/agent-core/src/agent/compaction/index.ts
@@ -2,3 +2,4 @@ export * from './full';
 export * from './micro';
 export * from './strategy';
 export * from './types';
+export * from './memento';
diff --git a/packages/agent-core/src/agent/compaction/memento.ts b/packages/agent-core/src/agent/compaction/memento.ts
new file mode 100644
index 000000000..2af6a0abf
--- /dev/null
+++ b/packages/agent-core/src/agent/compaction/memento.ts
@@ -0,0 +1,113 @@
+import { estimateTokens, estimateTokensForMessage } from '../../utils/tokens';
+import summaryPrefixTemplate from './compaction-summary-prefix.md?raw';
+
+/**
+ * "Memento" compaction helpers.
+ *
+ * Compaction rewrites the model context as: the most recent user messages
+ * (verbatim, within a token budget) followed by a single user-role summary
+ * that is prefixed with `COMPACTION_SUMMARY_PREFIX`. Assistant messages,
+ * tool calls, and tool results are dropped. These helpers apply the exact
+ * same rule for both the live context rewrite and the transcript reducer.
+ */
+
+export const COMPACTION_SUMMARY_PREFIX = summaryPrefixTemplate.trimEnd();
+export const COMPACT_USER_MESSAGE_MAX_TOKENS = 20_000;
+
+interface ContentPartLike {
+  readonly type: string;
+  readonly text?: string;
+}
+
+interface MessageLike {
+  readonly role: string;
+  readonly content: readonly ContentPartLike[];
+  readonly origin?: { readonly kind: string; readonly trigger?: string } | undefined;
+}
+
+function extractText(content: readonly ContentPartLike[]): string {
+  let text = '';
+  for (const part of content) {
+    if (part.type === 'text' && typeof part.text === 'string') {
+      text += part.text;
+    }
+  }
+  return text;
+}
+
+export function isCompactionSummaryMessage(message: MessageLike): boolean {
+  if (message.origin?.kind === 'compaction_summary') return true;
+  return extractText(message.content).startsWith(`${COMPACTION_SUMMARY_PREFIX}\n`);
+}
+
+/**
+ * Keep only genuine user input (real user prompts and user-slash skill
+ * activations). Injections (system reminders, plan-mode reminders),
+ * background-task notifications, system triggers, cron/hook/retry messages,
+ * and previous compaction summaries are excluded — they are either
+ * re-injected each turn or ephemeral, since initial context is rebuilt
+ * every turn.
+ */
+export function isRealUserInput(message: MessageLike): boolean {
+  if (message.role !== 'user') return false;
+  const origin = message.origin;
+  if (origin === undefined || origin.kind === 'user') return true;
+  if (origin.kind === 'skill_activation') return origin.trigger === 'user-slash';
+  return false;
+}
+
+export function collectCompactableUserMessages<T extends MessageLike>(messages: readonly T[]): T[] {
+  return messages.filter(
+    (message) => isRealUserInput(message) && !isCompactionSummaryMessage(message),
+  );
+}
+
+function truncateTextToTokens(text: string, maxTokens: number): string {
+  if (maxTokens <= 0) return '';
+  if (estimateTokens(text) <= maxTokens) return text;
+  let end = Math.min(text.length, maxTokens * 4);
+  while (end > 0 && estimateTokens(text.slice(0, end)) > maxTokens) {
+    end--;
+  }
+  return text.slice(0, end);
+}
+
+function truncateUserMessage<T extends MessageLike>(message: T, maxTokens: number): T {
+  const text = truncateTextToTokens(extractText(message.content), maxTokens);
+  return {
+    ...message,
+    content: [{ type: 'text', text }],
+    toolCalls: [],
+  } as unknown as T;
+}
+
+/**
+ * Keep the most recent user messages whose cumulative estimated size fits
+ * `maxTokens`. The oldest kept message is truncated to the remaining budget
+ * when it would otherwise overflow; older messages are dropped.
+ */
+export function selectRecentUserMessages<T extends MessageLike>(
+  messages: readonly T[],
+  maxTokens: number = COMPACT_USER_MESSAGE_MAX_TOKENS,
+): T[] {
+  const selected: T[] = [];
+  let remaining = maxTokens;
+  for (let i = messages.length - 1; i >= 0 && remaining > 0; i--) {
+    const message = messages[i]!;
+    const tokens = estimateTokensForMessage(message as never);
+    if (tokens <= remaining) {
+      selected.push(message);
+      remaining -= tokens;
+    } else {
+      selected.push(truncateUserMessage(message, remaining));
+      break;
+    }
+  }
+  selected.reverse();
+  return selected;
+}
+
+export function buildCompactionSummaryText(summary: string): string {
+  const suffix = summary.trim();
+  return `${COMPACTION_SUMMARY_PREFIX}\n${suffix.length > 0 ? suffix : '(no summary available)'}`;
+}
diff --git a/packages/agent-core/src/agent/compaction/strategy.ts b/packages/agent-core/src/agent/compaction/strategy.ts
index edf9132e0..faff72c0f 100644
--- a/packages/agent-core/src/agent/compaction/strategy.ts
+++ b/packages/agent-core/src/agent/compaction/strategy.ts
@@ -1,34 +1,31 @@
-import type { Message } from "@moonshot-ai/kosong";
-import { estimateTokensForMessage } from "../../utils/tokens";
-import type { CompactionSource } from "./types";
+import type { CompactionSource } from './types';
 
 export interface CompactionConfig {
+  /** Fraction of the model context window that triggers auto-compaction. */
   triggerRatio: number;
+  /** Fraction of the model context window that blocks the turn on compaction. */
   blockRatio: number;
+  /** Reserved output budget; compaction triggers early to leave this much room. */
   reservedContextSize: number;
+  /** Maximum number of auto-compactions allowed in a single turn. */
   maxCompactionPerTurn: number;
-  maxRecentMessages: number;
-  maxRecentUserMessages: number;
-  maxRecentSizeRatio: number;
-  minOverflowReductionRatio: number;
 }
 
+/**
+ * Auto-compact at 90% of the resolved context window. `blockRatio` matches
+ * `triggerRatio` so compaction runs synchronously with no background
+ * compaction.
+ */
 export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = {
-  triggerRatio: 0.85,
-  blockRatio: 0.85, // Same as triggerRatio to disable async compaction
+  triggerRatio: 0.9,
+  blockRatio: 0.9,
   reservedContextSize: 50_000,
   maxCompactionPerTurn: Infinity,
-  maxRecentMessages: 4,
-  maxRecentUserMessages: Infinity,
-  maxRecentSizeRatio: 0.2,
-  minOverflowReductionRatio: 0.05,
 };
 
 export interface CompactionStrategy {
   shouldCompact(usedSize: number): boolean;
   shouldBlock(usedSize: number): boolean;
-  computeCompactCount(messages: readonly Message[], source: CompactionSource): number;
-  reduceCompactOnOverflow(messages: readonly Message[]): number;
   readonly checkAfterStep: boolean;
   readonly maxCompactionPerTurn: number;
 }
@@ -36,8 +33,8 @@ export interface CompactionStrategy {
 export class DefaultCompactionStrategy implements CompactionStrategy {
   constructor(
     protected readonly maxSizeProvider: () => number,
-    protected readonly config: CompactionConfig = DEFAULT_COMPACTION_CONFIG
-  ) { }
+    protected readonly config: CompactionConfig = DEFAULT_COMPACTION_CONFIG,
+  ) {}
 
   protected get maxSize(): number {
     return this.maxSizeProvider();
@@ -64,111 +61,6 @@ export class DefaultCompactionStrategy implements CompactionStrategy {
     return reservedSize > 0 && reservedSize < this.maxSize && usedSize + reservedSize >= this.maxSize;
   }
 
-  computeCompactCount(messages: readonly Message[], source: CompactionSource): number {
-    // Return value: N messages to be compacted (0 means no compaction possible)
-    // LLM Input: messages.slice(0, N) + [user:instruction]
-    // Preserved recent messages: messages.slice(N)
-
-    // Manual compaction
-    if (source === 'manual') {
-      for (let i = messages.length - 1; i > 0; i--) {
-        if (canSplitAfter(messages, i)) {
-          return this.fitCompactCountToWindow(messages, i + 1);
-        }
-      }
-      return 0;
-    }
-
-    // Auto compaction rules (in order of precedence):
-    // 1. The split after messages[N-1] must be safe per `canSplitAfter`:
-    //    messages[N-1] is not a user or asst-with-tool-calls, and the retained
-    //    suffix messages.slice(N) has no orphan tool result.
-    // 2. At least one recent message must be preserved
-    // 3. At most maxRecentMessages recent messages should be preserved
-    // 4. At most maxRecentUserMessages recent user messages should be preserved
-    // 5. At most maxRecentSizeRatio * maxSize recent messages should be preserved
-    // 6. N should be as small as possible
-
-    let recentMessages = 1;
-    let recentUserMessages = 0;
-    let recentSize = 0;
-    let bestN: number | undefined;
-
-    for (; recentMessages < messages.length; recentMessages++) {
-      const splitIndex = messages.length - recentMessages - 1;
-      const m2 = messages[messages.length - recentMessages]!;
-
-      if (m2.role === 'user') {
-        recentUserMessages++;
-      }
-      recentSize += estimateTokensForMessage(m2);
-
-      if (canSplitAfter(messages, splitIndex)) {
-        bestN = splitIndex + 1;
-      }
-
-      const reachesMax = recentMessages >= this.config.maxRecentMessages
-        || recentUserMessages >= this.config.maxRecentUserMessages
-        || recentSize >= this.maxSize * this.config.maxRecentSizeRatio;
-      if (reachesMax && bestN !== undefined) {
-        break;
-      }
-    }
-
-    return this.fitCompactCountToWindow(messages, bestN ?? 0);
-  }
-
-  reduceCompactOnOverflow(messages: readonly Message[]): number {
-    const minReducedSize = Math.max(
-      1,
-      Math.ceil(this.maxSize * this.config.minOverflowReductionRatio),
-    );
-    let reducedSize = 0;
-    let bestN: number | undefined;
-
-    for (let i = messages.length - 2; i > 0; i--) {
-      reducedSize += estimateTokensForMessage(messages[i + 1]!);
-      if (canSplitAfter(messages, i)) {
-        bestN = i + 1;
-        if (reducedSize >= minReducedSize) {
-          return i + 1;
-        }
-      }
-    }
-    return bestN ?? messages.length;
-  }
-
-  private fitCompactCountToWindow(
-    messages: readonly Message[],
-    compactedCount: number,
-  ): number {
-    if (this.maxSize <= 0 || compactedCount <= 0) {
-      return compactedCount;
-    }
-
-    let compactedSize = 0;
-    for (let i = 0; i < compactedCount; i++) {
-      compactedSize += estimateTokensForMessage(messages[i]!);
-    }
-    if (compactedSize <= this.maxSize) {
-      return compactedCount;
-    }
-
-    let bestN: number | undefined;
-    for (let n = compactedCount - 1; n > 0; n--) {
-      compactedSize -= estimateTokensForMessage(messages[n]!);
-      if (!canSplitAfter(messages, n - 1)) {
-        continue;
-      }
-      bestN = n;
-      if (compactedSize <= this.maxSize) {
-        return n;
-      }
-    }
-
-    return bestN ?? compactedCount;
-  }
-
   get checkAfterStep(): boolean {
     return this.config.triggerRatio !== this.config.blockRatio;
   }
@@ -178,43 +70,4 @@ export class DefaultCompactionStrategy implements CompactionStrategy {
   }
 }
 
-/**
- * Decide whether a compaction split is safe to place immediately after
- * `messages[index]`. A split is safe only when:
- *   - `messages[index]` itself is not a user message or an assistant message
- *     with pending tool calls (cutting either of those off from what follows
- *     would break the conversation), AND
- *   - the next message is not a tool result. The history is well-formed:
- *     tool results only appear after their owning `asst_w_tc` and all tool
- *     results for one exchange land consecutively before the next non-tool
- *     message. So if the suffix starts with a tool result, its `asst_w_tc`
- *     must be in the compacted prefix, which would orphan that result
- *     (e.g. splitting between tool_a and tool_b of a parallel call), AND
- *   - the compacted prefix itself does not end with an unresolved tool
- *     exchange, because pending tool results must remain in the retained tail.
- */
-function canSplitAfter(messages: readonly Message[], index: number): boolean {
-  const m = messages[index];
-  if (m === undefined) return false;
-  if (m.role === 'user') return false;
-  if (m.role === 'assistant' && m.toolCalls.length > 0) return false;
-  if (messages[index + 1]?.role === 'tool') return false;
-  if (prefixEndsWithOpenToolExchange(messages, index)) return false;
-  return true;
-}
-
-function prefixEndsWithOpenToolExchange(messages: readonly Message[], index: number): boolean {
-  if (messages[index]?.role !== 'tool') return false;
-
-  let toolResultCount = 0;
-  for (let i = index; i >= 0; i--) {
-    const message = messages[i];
-    if (message === undefined) return false;
-    if (message.role === 'tool') {
-      toolResultCount++;
-      continue;
-    }
-    return message.role === 'assistant' && message.toolCalls.length > toolResultCount;
-  }
-  return false;
-}
+export type { CompactionSource };
diff --git a/packages/agent-core/src/agent/context/index.ts b/packages/agent-core/src/agent/context/index.ts
index 88edda53f..5d426aa9b 100644
--- a/packages/agent-core/src/agent/context/index.ts
+++ b/packages/agent-core/src/agent/context/index.ts
@@ -4,7 +4,12 @@ import type { Agent } from '..';
 import { ErrorCodes, KimiError } from '../../errors';
 import type { ExecutableToolResult, LoopRecordedEvent } from '../../loop';
 import { estimateTokensForMessages } from '../../utils/tokens';
-import type { CompactionResult } from '../compaction';
+import {
+  COMPACT_USER_MESSAGE_MAX_TOKENS,
+  collectCompactableUserMessages,
+  selectRecentUserMessages,
+  type CompactionResult,
+} from '../compaction';
 import { project, trimTrailingOpenToolExchange } from './projector';
 import {
   USER_PROMPT_ORIGIN,
@@ -175,17 +180,24 @@ export class ContextMemory {
         tokensAfter: result.tokensAfter,
       },
     });
+    const keptUserMessages = selectRecentUserMessages(
+      collectCompactableUserMessages(this._history),
+      COMPACT_USER_MESSAGE_MAX_TOKENS,
+    );
     this._history = [
+      ...keptUserMessages,
       {
-        role: 'assistant',
+        role: 'user',
         content: [{ type: 'text', text: result.summary }],
         toolCalls: [],
         origin: { kind: 'compaction_summary' },
       },
-      ...this._history.slice(result.compactedCount),
     ];
     this.openSteps.clear();
-    this.flushDeferredMessagesIfToolExchangeClosed();
+    this.pendingToolResultIds.clear();
+    // Drop deferred messages (mostly injections/system reminders) instead of
+    // flushing them: initial context is rebuilt every turn.
+    this.deferredMessages = [];
     this._tokenCount = result.tokensAfter;
     this.tokenCountCoveredMessageCount = this._history.length;
     this.agent.microCompaction.reset();
diff --git a/packages/agent-core/src/services/message/transcript.ts b/packages/agent-core/src/services/message/transcript.ts
index e98bed516..771dc16cc 100644
--- a/packages/agent-core/src/services/message/transcript.ts
+++ b/packages/agent-core/src/services/message/transcript.ts
@@ -45,6 +45,11 @@ import path from 'node:path';
 import type { AgentRecord } from '../../agent/records';
 import type { ContextMessage } from '../../agent/context';
 import type { ExecutableToolResult, LoopRecordedEvent } from '../../loop';
+import {
+  COMPACT_USER_MESSAGE_MAX_TOKENS,
+  collectCompactableUserMessages,
+  selectRecentUserMessages,
+} from '../../agent/compaction';
 
 type ContentPart = ContextMessage['content'][number];
 
@@ -238,20 +243,24 @@ export function reduceWireRecords(records: Iterable<AgentRecord>): {
         applyLoopEvent(record.event, record.time);
         break;
       case 'context.apply_compaction': {
-        // ContextMemory drops history[0..compactedCount] and prepends the
-        // summary; we keep the prefix and insert the summary at the fold
-        // point so the transcript shows both.
-        const tailLength = Math.max(0, foldedLength - record.compactedCount);
-        transcript.splice(Math.max(0, transcript.length - tailLength), 0, {
+        // Mirrors ContextMemory.applyCompaction: the live context becomes the
+        // most recent user messages followed by a user-role summary. The
+        // transcript keeps the full history and appends the summary marker;
+        // foldedLength tracks the post-compaction live context length.
+        const keptUserMessages = selectRecentUserMessages(
+          collectCompactableUserMessages(transcript.map((entry) => entry.message)),
+          COMPACT_USER_MESSAGE_MAX_TOKENS,
+        );
+        transcript.push({
           message: {
-            role: 'assistant',
+            role: 'user',
             content: [{ type: 'text', text: record.summary }],
             toolCalls: [],
             origin: { kind: 'compaction_summary' },
           },
           time: record.time,
         });
-        foldedLength = tailLength + 1;
+        foldedLength = keptUserMessages.length + 1;
         openSteps.clear();
         flushDeferredIfToolExchangeClosed();
         break;
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index 85da0ed46..1703d645b 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -17,7 +17,11 @@ import {
 import { afterEach, describe, expect, it, vi } from 'vitest';
 
 import type { AgentOptions } from '../../../src/agent';
-import { DefaultCompactionStrategy, type CompactionStrategy } from '../../../src/agent/compaction';
+import {
+  COMPACTION_SUMMARY_PREFIX,
+  DefaultCompactionStrategy,
+  type CompactionStrategy,
+} from '../../../src/agent/compaction';
 import { FLAG_DEFINITIONS, MASTER_ENV } from '../../../src/flags';
 import { HookEngine, type HookEngineTriggerArgs } from '../../../src/session/hooks';
 import { estimateTokensForMessages } from '../../../src/utils/tokens';
@@ -43,138 +47,6 @@ const CATALOGUED_MODEL_CAPABILITIES = {
 const MICRO_COMPACTION_FLAG_ENV = getMicroCompactionFlagEnv();
 
 describe('FullCompaction', () => {
-  it('keeps an oversized trailing user message as recent', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', `pending user ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('keeps consecutive trailing user messages as recent', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', `pending user one ${'x'.repeat(1_200)}`),
-      textMessage('user', `pending user two ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('compacts the prefix when the trailing exchange itself is oversized', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', 'recent user'),
-      textMessage('assistant', `recent assistant ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('returns 0 when there is nothing to compact', () => {
-    const strategy = testCompactionStrategy();
-    expect(strategy.computeCompactCount([], 'auto')).toBe(0);
-    expect(strategy.computeCompactCount([textMessage('user', 'only pending')], 'auto')).toBe(0);
-    expect(
-      strategy.computeCompactCount(
-        [
-          textMessage('user', 'a'),
-          textMessage('user', 'b'),
-          textMessage('user', 'c'),
-        ],
-        'auto',
-      ),
-    ).toBe(0);
-  });
-
-  it('returns 0 when no intermediate split exists and the last message is also unsplittable', () => {
-    const strategy = testCompactionStrategy();
-    const messages: Message[] = [
-      textMessage('user', 'inspect'),
-      {
-        role: 'assistant',
-        content: [],
-        toolCalls: [{ type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' }],
-      },
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(0);
-  });
-
-  it('does not split inside a parallel tool exchange', () => {
-    const strategy = testCompactionStrategy();
-    const messages: Message[] = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', 'run both tools'),
-      {
-        role: 'assistant',
-        content: [],
-        toolCalls: [
-          { type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' },
-          { type: 'function', id: 'call_b', name: 'Lookup', arguments: '{}' },
-        ],
-      },
-      { role: 'tool', content: [{ type: 'text', text: 'a' }], toolCalls: [], toolCallId: 'call_a' },
-      { role: 'tool', content: [{ type: 'text', text: 'b' }], toolCalls: [], toolCallId: 'call_b' },
-      textMessage('user', 'next prompt'),
-    ];
-
-    // The only valid split is before the parallel exchange (after 'old assistant'),
-    // never between tool_a and tool_b — that would leave tool_b as an orphan.
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('reserves response context by default before the ratio threshold is reached', () => {
-    const strategy = new DefaultCompactionStrategy(() => 256_000);
-
-    expect(strategy.shouldCompact(210_000)).toBe(true);
-    expect(strategy.shouldBlock(210_000)).toBe(true);
-  });
-
-  it('backs off overflow compaction by at least five percent of the context window', () => {
-    const strategy = testCompactionStrategy(1_000);
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      ...Array.from({ length: 20 }, () => [
-        textMessage('user', 'continue'),
-        textMessage('assistant', ''),
-      ]).flat(),
-    ];
-
-    const reduced = strategy.reduceCompactOnOverflow(messages);
-    const removed = messages.slice(reduced);
-
-    expect(reduced).toBeGreaterThan(0);
-    expect(estimateTokensForMessages(removed)).toBeGreaterThanOrEqual(50);
-  });
-
-  it('ignores reserved context when the reserve is not smaller than the model window', () => {
-    const strategy = new DefaultCompactionStrategy(() => 32_000, {
-      triggerRatio: 0.85,
-      blockRatio: 0.85,
-      reservedContextSize: 50_000,
-      maxCompactionPerTurn: 3,
-      maxRecentMessages: 3,
-      maxRecentUserMessages: Infinity,
-      maxRecentSizeRatio: 0.2,
-      minOverflowReductionRatio: 0.05,
-    });
-
-    expect(strategy.shouldCompact(1)).toBe(false);
-    expect(strategy.shouldBlock(1)).toBe(false);
-    expect(strategy.shouldCompact(28_000)).toBe(true);
-    expect(strategy.shouldBlock(28_000)).toBe(true);
-  });
-
   it('runs manual compaction and applies the compacted context', async () => {
     const records: TelemetryRecord[] = [];
     const ctx = testAgent({ telemetry: recordingTelemetry(records) });
@@ -203,12 +75,12 @@ describe('FullCompaction', () => {
       [wire] context.append_message     { "message": { "role": "user", "content": [ { "type": "text", "text": "recent user three" } ], "toolCalls": [], "origin": { "kind": "user" } }, "time": "<time>" }
       [wire] full_compaction.begin      { "source": "manual", "instruction": "Keep the important test facts.", "time": "<time>" }
       [emit] compaction.started         { "trigger": "manual", "instruction": "Keep the important test facts." }
-      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 520, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 120, "maxContextTokens": 256000, "contextUsage": 0.00046875, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 520, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 520, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction   { "summary": "Compacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 5, "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 5, "maxContextTokens": 256000, "contextUsage": 0.00001953125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 520, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 520, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 155, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 120, "maxContextTokens": 256000, "contextUsage": 0.00046875, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 155, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 155, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.apply_compaction   { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119, "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 119, "maxContextTokens": 256000, "contextUsage": 0.00046484375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 155, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 155, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete   { "time": "<time>" }
-      [emit] compaction.completed       { "result": { "summary": "Compacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 5 } }
+      [emit] compaction.completed       { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119 } }
     `);
     expect(ctx.lastLlmInput()).toMatchInlineSnapshot(`
       system: <system-prompt>
@@ -220,13 +92,26 @@ describe('FullCompaction', () => {
         assistant: text "old assistant two"
         user: text "recent user three"
         assistant: text "recent assistant three"
-        user: text <compaction-instruction>
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nKeep the important test facts."
     `);
     expect(ctx.compactHistory()).toMatchInlineSnapshot(`
       [
         {
-          "role": "assistant",
-          "text": "Compacted summary.",
+          "role": "user",
+          "text": "old user one",
+        },
+        {
+          "role": "user",
+          "text": "old user two",
+        },
+        {
+          "role": "user",
+          "text": "recent user three",
+        },
+        {
+          "role": "user",
+          "text": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
+      Compacted summary.",
         },
       ]
     `);
@@ -236,12 +121,12 @@ describe('FullCompaction', () => {
         source: 'manual',
         instruction: 'Keep the important test facts.',
         tokensBefore: 39,
-        tokensAfter: 5,
+        tokensAfter: 119,
         duration: expect.any(Number),
         compactedCount: 6,
         retryCount: 0,
         thinkingLevel: 'off',
-        inputOther: 520,
+        inputOther: 155,
         output: 8,
         inputCacheRead: 0,
         inputCacheCreation: 0,
@@ -387,7 +272,9 @@ describe('FullCompaction', () => {
     expect(authKeys).toEqual(['fresh-token', 'forced-refresh-token', 'fresh-token']);
     expect(tokenCalls).toEqual([undefined, true, undefined]);
     expect(ctx.compactHistory()).toEqual([
-      { role: 'assistant', text: 'Recovered compacted summary.' },
+      { role: 'user', text: 'old user one' },
+      { role: 'user', text: 'recent user two' },
+      { role: 'user', text: `${COMPACTION_SUMMARY_PREFIX}\nRecovered compacted summary.` },
     ]);
     await ctx.expectResumeMatches();
   });
@@ -549,20 +436,22 @@ describe('FullCompaction', () => {
     await completed;
 
     expect(attempts).toBe(3);
-    // Each empty summary shrinks the compacted prefix before retrying, so the
-    // recovered summary compacts only the older exchange and leaves the recent
-    // one in history.
+    // Empty summaries are retried without shrinking the history; the recovered
+    // summary replaces the whole history with the real user messages plus the
+    // prefixed summary.
     expect(ctx.compactHistory()).toEqual([
-      { role: 'assistant', text: 'Recovered compacted summary.' },
+      { role: 'user', text: 'old user one' },
       { role: 'user', text: 'recent user two' },
-      { role: 'assistant', text: 'recent assistant two' },
+      { role: 'user', text: `${COMPACTION_SUMMARY_PREFIX}\nRecovered compacted summary.` },
     ]);
     expect(
       ctx.allEvents.filter((event) => event.event === 'compaction.completed'),
     ).toEqual([
       expect.objectContaining({
         args: expect.objectContaining({
-          result: expect.objectContaining({ summary: 'Recovered compacted summary.' }),
+          result: expect.objectContaining({
+            summary: expect.stringContaining('Recovered compacted summary.'),
+          }),
         }),
       }),
     ]);
@@ -605,12 +494,12 @@ describe('FullCompaction', () => {
     await completed;
 
     expect(inputs).toHaveLength(2);
-    // The retry compacts a strictly smaller prefix than the first attempt.
+    // The retry sends a strictly smaller input than the first attempt.
     expect(inputs[1]!.length).toBeLessThan(inputs[0]!.length);
     expect(ctx.compactHistory()).toEqual([
-      { role: 'assistant', text: 'Recovered compacted summary.' },
+      { role: 'user', text: 'old user one' },
       { role: 'user', text: 'recent user two' },
-      { role: 'assistant', text: 'recent assistant two' },
+      { role: 'user', text: `${COMPACTION_SUMMARY_PREFIX}\nRecovered compacted summary.` },
     ]);
     await ctx.expectResumeMatches();
   });
@@ -642,8 +531,10 @@ describe('FullCompaction', () => {
     await vi.advanceTimersByTimeAsync(60_000);
     await failed;
 
-    // MAX_COMPACTION_RETRY_ATTEMPTS attempts, with prefix reduction between them.
-    expect(inputs).toHaveLength(5);
+    // Each empty/think-only response drops the oldest item and resets the retry
+    // counter; once only one item remains, MAX_COMPACTION_RETRY_ATTEMPTS more
+    // retries run before failing. 3 drops + 5 retries = 8 generate calls.
+    expect(inputs).toHaveLength(8);
     expect(inputs[1]!.length).toBeLessThan(inputs[0]!.length);
     expect(records).toContainEqual({
       event: 'compaction_failed',
@@ -833,7 +724,9 @@ describe('FullCompaction', () => {
     await vi.advanceTimersByTimeAsync(60_000);
     const events = await ctx.untilTurnEnd();
 
-    expect(attempts).toBe(5);
+    // A single-item history cannot be shrunk further, so the truncated response
+    // fails immediately instead of looping through retries.
+    expect(attempts).toBe(1);
     expect(events).toContainEqual(
       expect.objectContaining({
         event: 'turn.ended',
@@ -931,13 +824,18 @@ describe('FullCompaction', () => {
       messages:
         user: text "old user one"
         assistant: text "old assistant one"
-        user: text <compaction-instruction>
+        user: text "run both tools"
+        assistant: []  calls call_open_one:LookupOne { "query": "one" }, call_open_two:LookupTwo { "query": "two" }
+        tool[call_open_one]: text "one result"
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nKeep stable facts."
     `);
+    // The unresolved tool exchange is sent to the model (see the compaction input
+    // above) but is dropped from the replacement history, leaving only the real
+    // user messages followed by the compaction summary.
     expect(ctx.agent.context.history.map((message) => message.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
+      'user',
+      'user',
     ]);
     ctx.dispatch({
       type: 'context.append_loop_event',
@@ -949,11 +847,9 @@ describe('FullCompaction', () => {
       },
     });
     expect(ctx.agent.context.history.map((message) => message.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
-      'tool',
+      'user',
+      'user',
     ]);
     await ctx.expectResumeMatches();
   });
@@ -981,12 +877,12 @@ describe('FullCompaction', () => {
       [wire] full_compaction.begin      { "source": "manual", "time": "<time>" }
       [emit] compaction.started         { "trigger": "manual" }
       [wire] context.append_message     { "message": { "role": "user", "content": [ { "type": "text", "text": "new user while compacting" } ], "toolCalls": [], "origin": { "kind": "user" } }, "time": "<time>" }
-      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 499, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 80, "maxContextTokens": 256000, "contextUsage": 0.0003125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 499, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 499, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction   { "summary": "Compacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 5, "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 5, "maxContextTokens": 256000, "contextUsage": 0.00001953125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 499, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 499, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 133, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 80, "maxContextTokens": 256000, "contextUsage": 0.0003125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 133, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 133, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.apply_compaction   { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 114, "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 114, "maxContextTokens": 256000, "contextUsage": 0.0004453125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 133, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 133, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete   { "time": "<time>" }
-      [emit] compaction.completed       { "result": { "summary": "Compacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 5 } }
+      [emit] compaction.completed       { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 114 } }
     `);
     expect(ctx.lastLlmInput()).toMatchInlineSnapshot(`
       system: <system-prompt>
@@ -996,116 +892,32 @@ describe('FullCompaction', () => {
         assistant: text "old assistant one"
         user: text "recent user two"
         assistant: text "recent assistant two"
-        user: text <compaction-instruction>
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work."
     `);
     expect(ctx.compactHistory()).toMatchInlineSnapshot(`
       [
         {
-          "role": "assistant",
-          "text": "Compacted prefix.",
+          "role": "user",
+          "text": "old user one",
+        },
+        {
+          "role": "user",
+          "text": "recent user two",
         },
         {
           "role": "user",
           "text": "new user while compacting",
         },
+        {
+          "role": "user",
+          "text": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
+      Compacted prefix.",
+        },
       ]
     `);
     await ctx.expectResumeMatches();
   });
 
-  it('continues a manual compaction run when the first pass still exceeds the trigger', async () => {
-    const ctx = testAgent();
-    ctx.configure({
-      provider: CATALOGUED_PROVIDER,
-      modelCapabilities: {
-        ...CATALOGUED_MODEL_CAPABILITIES,
-        max_context_tokens: 4_000,
-      },
-    });
-    ctx.appendExchange(
-      1,
-      `old user one ${'u'.repeat(14_000)}`,
-      `old assistant one ${'a'.repeat(14_000)}`,
-      6_000,
-    );
-    const firstSummary = `large manual summary ${'x'.repeat(14_000)}`;
-    let appliedCount = 0;
-    const secondCompacted = new Promise<void>((resolve) => {
-      const handler = () => {
-        appliedCount += 1;
-        if (appliedCount === 2) {
-          ctx.emitter.off('context.apply_compaction', handler);
-          resolve();
-        }
-      };
-      ctx.emitter.on('context.apply_compaction', handler);
-    });
-
-    ctx.mockNextResponse({ type: 'text', text: firstSummary });
-    ctx.mockNextResponse({ type: 'text', text: 'Second manual summary.' });
-    const completed = ctx.once('compaction.completed');
-    await ctx.rpc.beginCompaction({});
-    ctx.appendExchange(2, 'new user while compacting', 'new assistant while compacting', 6_000);
-    await secondCompacted;
-    await completed;
-
-    const events = ctx.newEvents();
-    expect(countEvents(events, 'context.apply_compaction')).toBe(2);
-    expect(countEvents(events, 'compaction.started')).toBe(1);
-    expect(countEvents(events, 'compaction.completed')).toBe(1);
-    expect(ctx.llmCalls).toHaveLength(2);
-    const [firstCompactionCall, secondCompactionCall] = ctx.llmCalls;
-    expect(firstCompactionCall?.history.map(messageText)).not.toContain('new user while compacting');
-    expect(secondCompactionCall?.history.map(messageText)).toContain(firstSummary);
-    expect(secondCompactionCall?.history.map(messageText)).toContain('new user while compacting');
-    expect(secondCompactionCall?.history.map(messageText)).toContain('new assistant while compacting');
-    expect(ctx.compactHistory()).toEqual([
-      {
-        role: 'assistant',
-        text: 'Second manual summary.',
-      },
-    ]);
-    await ctx.expectResumeMatches();
-  });
-
-  it('auto-compacts very large context in window-sized rounds', async () => {
-    const maxContextTokens = 4_000;
-    const ctx = testAgent();
-    ctx.configure({
-      provider: CATALOGUED_PROVIDER,
-      modelCapabilities: {
-        ...CATALOGUED_MODEL_CAPABILITIES,
-        max_context_tokens: maxContextTokens,
-      },
-    });
-    for (let i = 1; i <= 22; i++) {
-      ctx.appendAssistantTextWithUsage(
-        i,
-        `history chunk ${String(i)} ${'x'.repeat(7_200)}`,
-        i * 1_850,
-      );
-    }
-    const initialTokens = estimateTokensForMessages(ctx.agent.context.history);
-    const completed = ctx.once('compaction.completed');
-    for (let i = 1; i <= 30; i++) {
-      ctx.mockNextResponse({ type: 'text', text: `Auto summary ${String(i)}.` });
-    }
-
-    ctx.agent.fullCompaction.begin({ source: 'auto', instruction: undefined });
-    await completed;
-
-    const events = ctx.newEvents();
-    const compactedPrefixSizes = ctx.llmCalls.map((call) =>
-      estimateTokensForMessages(call.history.slice(0, -1)),
-    );
-    expect(initialTokens).toBeGreaterThan(maxContextTokens * 9);
-    expect(countEvents(events, 'context.apply_compaction')).toBeGreaterThan(1);
-    expect(countEvents(events, 'compaction.completed')).toBe(1);
-    expect(compactedPrefixSizes.length).toBeGreaterThan(1);
-    expect(compactedPrefixSizes.every((size) => size <= maxContextTokens)).toBe(true);
-    expect(ctx.agent.context.tokenCount).toBeLessThan(maxContextTokens * 0.85);
-    await ctx.expectResumeMatches();
-  });
 
   it('cancels when the compacted prefix changes before completion', async () => {
     const ctx = testAgent();
@@ -1129,8 +941,8 @@ describe('FullCompaction', () => {
       [emit] compaction.started       { "trigger": "manual" }
       [wire] context.clear            { "time": "<time>" }
       [emit] agent.status.updated     { "model": "kimi-code", "contextTokens": 0, "maxContextTokens": 256000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual" }
-      [wire] usage.record             { "model": "kimi-code", "usage": { "inputOther": 499, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated     { "model": "kimi-code", "contextTokens": 0, "maxContextTokens": 256000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 499, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 499, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record             { "model": "kimi-code", "usage": { "inputOther": 133, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated     { "model": "kimi-code", "contextTokens": 0, "maxContextTokens": 256000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 133, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 133, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.cancel   { "time": "<time>" }
       [emit] compaction.cancelled     {}
     `);
@@ -1142,7 +954,7 @@ describe('FullCompaction', () => {
         assistant: text "old assistant one"
         user: text "recent user two"
         assistant: text "recent assistant two"
-        user: text <compaction-instruction>
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work."
     `);
     expect(ctx.compactHistory()).toMatchInlineSnapshot(`[]`);
     await ctx.expectResumeMatches();
@@ -1173,20 +985,20 @@ describe('FullCompaction', () => {
       [wire] full_compaction.begin       { "source": "auto", "time": "<time>" }
       [emit] compaction.started          { "trigger": "auto" }
       [emit] compaction.blocked          { "turnId": 0 }
-      [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 498, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 950000, "maxContextTokens": 256000, "contextUsage": 3.7109375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 498, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 498, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction    { "summary": "Auto compacted summary.", "compactedCount": 4, "tokensBefore": 46, "tokensAfter": 28, "time": "<time>" }
-      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 28, "maxContextTokens": 256000, "contextUsage": 0.000109375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 498, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 498, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 154, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 950000, "maxContextTokens": 256000, "contextUsage": 3.7109375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 154, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 154, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.apply_compaction    { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nAuto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 127, "time": "<time>" }
+      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 127, "maxContextTokens": 256000, "contextUsage": 0.00049609375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 154, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 154, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete    { "time": "<time>" }
-      [emit] compaction.completed        { "result": { "summary": "Auto compacted summary.", "compactedCount": 4, "tokensBefore": 46, "tokensAfter": 28 } }
+      [emit] compaction.completed        { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nAuto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 127 } }
       [wire] context.append_loop_event   { "event": { "type": "step.begin", "uuid": "<uuid-1>", "turnId": "0", "step": 1 }, "time": "<time>" }
       [emit] turn.step.started           { "turnId": 0, "step": 1, "stepId": "<uuid-1>" }
       [emit] assistant.delta             { "turnId": 0, "delta": "I can answer after compaction." }
       [wire] context.append_loop_event   { "event": { "type": "content.part", "uuid": "<uuid-2>", "turnId": "0", "step": 1, "stepUuid": "<uuid-1>", "part": { "type": "text", "text": "I can answer after compaction." } }, "time": "<time>" }
-      [wire] context.append_loop_event   { "event": { "type": "step.end", "uuid": "<uuid-1>", "turnId": "0", "step": 1, "usage": { "inputOther": 31, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "end_turn" }, "time": "<time>" }
-      [emit] turn.step.completed         { "turnId": 0, "step": 1, "stepId": "<uuid-1>", "usage": { "inputOther": 31, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "end_turn" }
-      [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 31, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "turn", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 42, "maxContextTokens": 256000, "contextUsage": 0.0001640625, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 529, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 529, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 31, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.append_loop_event   { "event": { "type": "step.end", "uuid": "<uuid-1>", "turnId": "0", "step": 1, "usage": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "end_turn" }, "time": "<time>" }
+      [emit] turn.step.completed         { "turnId": 0, "step": 1, "stepId": "<uuid-1>", "usage": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "end_turn" }
+      [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "turn", "time": "<time>" }
+      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 137, "maxContextTokens": 256000, "contextUsage": 0.00053515625, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 280, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 280, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [emit] turn.ended                  { "turnId": 0, "reason": "completed" }
     `);
     expect(ctx.llmInputs()).toMatchInlineSnapshot(`
@@ -1198,22 +1010,23 @@ describe('FullCompaction', () => {
           assistant: text "old assistant one"
           user: text "old user two"
           assistant: text "old assistant two"
-          user: text <compaction-instruction>
-
-      call 2:
-        messages:
-          assistant: text "Auto compacted summary."
           user: text "recent user three"
           assistant: text "recent assistant three"
           user: text "Answer after compacting"
+          user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work."
+
+      call 2:
+        messages:
+          user: text "old user one\\n\\nold user two\\n\\nrecent user three\\n\\nAnswer after compacting"
+          user: text "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nAuto compacted summary."
     `);
     expect(records).toContainEqual({
       event: 'compaction_finished',
       properties: expect.objectContaining({
         source: 'auto',
         tokensBefore: 46,
-        tokensAfter: 28,
-        compactedCount: 4,
+        tokensAfter: 127,
+        compactedCount: 7,
         retryCount: 0,
       }),
     });
@@ -1246,15 +1059,18 @@ describe('FullCompaction', () => {
     await ctx.rpc.beginCompaction({});
     await compacted;
 
-    // Compaction preserves the in-flight tool exchange in recent; the deferred
-    // reminder still cannot land because the tool exchange is still open.
+    // Compaction drops the in-flight tool exchange and the deferred reminder
+    // (initial context is rebuilt every turn); only real user messages and
+    // the compaction summary remain.
     expect(ctx.agent.context.history.map((m) => m.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
+      'user',
+      'user',
     ]);
+    expect(ctx.agent.context.history.at(-1)?.origin).toEqual({ kind: 'compaction_summary' });
 
-    // Closing the exchange flushes the deferred reminder to history.
+    // The dropped tool calls no longer exist, so late tool results are orphans
+    // and do not change history.
     ctx.dispatch({
       type: 'context.append_loop_event',
       event: {
@@ -1275,15 +1091,9 @@ describe('FullCompaction', () => {
     });
 
     expect(ctx.agent.context.history.map((m) => m.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
-      'tool',
       'user',
-    ]);
-    expect(ctx.agent.context.history.at(-1)?.content).toEqual([
-      { type: 'text', text: '<system-reminder>\nhost note\n</system-reminder>' },
+      'user',
     ]);
   });
 
@@ -1314,13 +1124,18 @@ describe('FullCompaction', () => {
     await ctx.rpc.beginCompaction({});
     await compacted;
 
+    // Compaction drops the partially-resolved tool exchange and the deferred
+    // reminder (initial context is rebuilt every turn); only real user
+    // messages and the compaction summary remain.
     expect(ctx.agent.context.history.map((m) => m.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
+      'user',
+      'user',
     ]);
+    expect(ctx.agent.context.history.at(-1)?.origin).toEqual({ kind: 'compaction_summary' });
 
+    // The dropped tool calls no longer exist, so a late tool result is an orphan
+    // and does not change history.
     ctx.dispatch({
       type: 'context.append_loop_event',
       event: {
@@ -1332,73 +1147,44 @@ describe('FullCompaction', () => {
     });
 
     expect(ctx.agent.context.history.map((m) => m.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
-      'tool',
       'user',
-    ]);
-    expect(ctx.agent.context.history.at(-1)?.content).toEqual([
-      { type: 'text', text: '<system-reminder>\nhost note\n</system-reminder>' },
+      'user',
     ]);
   });
 
-  it('fails the turn with compaction.unable when auto compaction has no compactable prefix', async () => {
+  it('rejects manual compaction with compaction.unable when history is empty', async () => {
     const ctx = testAgent();
     ctx.configure({
       provider: CATALOGUED_PROVIDER,
-      modelCapabilities: {
-        ...CATALOGUED_MODEL_CAPABILITIES,
-        max_context_tokens: 2_000,
-      },
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
     });
-    const oversizedPrompt = `initial-pending-verbatim:${'x'.repeat(8_000)}`;
-
-    await ctx.rpc.prompt({ input: [{ type: 'text', text: oversizedPrompt }] });
-    const events = await ctx.untilTurnEnd();
 
-    expect(eventIndex(events, 'compaction.started')).toBe(-1);
+    await expect(ctx.rpc.beginCompaction({})).rejects.toMatchObject({
+      code: 'compaction.unable',
+    });
     expect(ctx.llmCalls).toHaveLength(0);
-    expect(events).toContainEqual(
-      expect.objectContaining({
-        event: 'turn.ended',
-        args: expect.objectContaining({
-          reason: 'failed',
-          error: expect.objectContaining({ code: 'compaction.unable' }),
-        }),
-      }),
-    );
-    await ctx.expectResumeMatches();
   });
 
-  it('rejects manual compaction with compaction.unable when no prefix is compactable', async () => {
+  it('compacts a single user message and keeps it ahead of the summary', async () => {
     const ctx = testAgent();
     ctx.configure({
       provider: CATALOGUED_PROVIDER,
       modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
     });
     ctx.agent.context.appendUserMessage([{ type: 'text', text: 'only pending user' }]);
-
-    await expect(ctx.rpc.beginCompaction({})).rejects.toMatchObject({
-      code: 'compaction.unable',
-    });
-    expect(ctx.llmCalls).toHaveLength(0);
-
-    ctx.agent.context.clear();
-    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
-    ctx.appendExchange(2, 'recent user two', 'recent assistant two', 80);
     const compacted = ctx.once('context.apply_compaction');
     const completed = ctx.once('compaction.completed');
 
-    ctx.mockNextResponse({ type: 'text', text: 'Compacted after no-op cancel.' });
+    ctx.mockNextResponse({ type: 'text', text: 'Single message summary.' });
     await ctx.rpc.beginCompaction({});
     await compacted;
     await completed;
 
     expect(ctx.llmCalls).toHaveLength(1);
     expect(ctx.compactHistory()).toEqual([
-      { role: 'assistant', text: 'Compacted after no-op cancel.' },
+      { role: 'user', text: 'only pending user' },
+      { role: 'user', text: `${COMPACTION_SUMMARY_PREFIX}\nSingle message summary.` },
     ]);
     await ctx.expectResumeMatches();
   });
@@ -1453,8 +1239,10 @@ describe('FullCompaction', () => {
 
     expect(ctx.llmCalls).toHaveLength(2);
     const [compactionCall, answerCall] = ctx.llmCalls;
-    expect(messageText(compactionCall?.history.at(-1))).toContain('<!-- Compression Priorities');
-    expect(answerCall?.history.map(messageText)).toContain('Reserved compacted summary.');
+    expect(messageText(compactionCall?.history.at(-1))).toContain('CONTEXT CHECKPOINT COMPACTION');
+    expect(
+      answerCall?.history.map(messageText).some((text) => text.includes('Reserved compacted summary.')),
+    ).toBe(true);
     await ctx.expectResumeMatches();
   });
 
@@ -1478,10 +1266,21 @@ describe('FullCompaction', () => {
     expect(ctx.llmCalls).toHaveLength(2);
     const [compactionCall, answerCall] = ctx.llmCalls;
     const compactionTexts = compactionCall?.history.map(messageText) ?? [];
-    expect(compactionTexts.some((text) => text.includes('keep-this-pending-verbatim'))).toBe(false);
-    expect(compactionCall?.history.map((message) => message.role)).toEqual(['user', 'assistant', 'user']);
-    expect(answerCall?.history.map(messageText)).toContain('Oversized prompt summary.');
-    expect(messageText(answerCall?.history.at(-1))).toBe(oversizedPrompt);
+    // The whole history is compacted, so the pending prompt is included in the
+    // compaction input and kept verbatim in the post-compaction replacement.
+    expect(compactionTexts.some((text) => text.includes('keep-this-pending-verbatim'))).toBe(true);
+    expect(compactionCall?.history.map((message) => message.role)).toEqual([
+      'user',
+      'assistant',
+      'user',
+      'user',
+    ]);
+    expect(
+      answerCall?.history.map(messageText).some((text) => text.includes('Oversized prompt summary.')),
+    ).toBe(true);
+    expect(
+      answerCall?.history.map(messageText).some((text) => text.includes('keep-this-pending-verbatim')),
+    ).toBe(true);
     await ctx.expectResumeMatches();
   });
 
@@ -1494,7 +1293,9 @@ describe('FullCompaction', () => {
         max_context_tokens: 1_000_000,
       },
     });
-    ctx.appendExchange(1, 'old user one', 'old assistant one', 840_000);
+    // The auto-compact ratio moved from 0.85 to 0.9, so the context must sit
+    // above 90% of the 1_000_000 window (plus pending tokens) to trigger.
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 890_000);
     const pendingPrompt = `ratio-pending-verbatim:${'x'.repeat(60_000)}`;
 
     ctx.mockNextResponse({ type: 'text', text: 'Ratio compacted summary.' });
@@ -1505,10 +1306,21 @@ describe('FullCompaction', () => {
     expect(ctx.llmCalls).toHaveLength(2);
     const [compactionCall, answerCall] = ctx.llmCalls;
     const compactionTexts = compactionCall?.history.map(messageText) ?? [];
-    expect(compactionTexts.some((text) => text.includes('ratio-pending-verbatim'))).toBe(false);
-    expect(compactionCall?.history.map((message) => message.role)).toEqual(['user', 'assistant', 'user']);
-    expect(answerCall?.history.map(messageText)).toContain('Ratio compacted summary.');
-    expect(messageText(answerCall?.history.at(-1))).toBe(pendingPrompt);
+    // The whole history is compacted, so the pending prompt is included in the
+    // compaction input and kept verbatim in the post-compaction replacement.
+    expect(compactionTexts.some((text) => text.includes('ratio-pending-verbatim'))).toBe(true);
+    expect(compactionCall?.history.map((message) => message.role)).toEqual([
+      'user',
+      'assistant',
+      'user',
+      'user',
+    ]);
+    expect(
+      answerCall?.history.map(messageText).some((text) => text.includes('Ratio compacted summary.')),
+    ).toBe(true);
+    expect(
+      answerCall?.history.map(messageText).some((text) => text.includes('ratio-pending-verbatim')),
+    ).toBe(true);
 
     await ctx.expectResumeMatches();
   });
@@ -1556,8 +1368,8 @@ describe('FullCompaction', () => {
       expect.objectContaining({
         event: 'context.apply_compaction',
         args: expect.objectContaining({
-          summary: 'Overflow compacted summary.',
-          compactedCount: 2,
+          summary: expect.stringContaining('Overflow compacted summary.'),
+          compactedCount: 4,
         }),
       }),
     );
@@ -1577,11 +1389,15 @@ describe('FullCompaction', () => {
         [
           "user: old user one",
           "assistant: old assistant one",
+          "user: Retry after provider overflow",
           "user: <compaction-instruction>",
         ],
         [
-          "assistant: Overflow compacted summary.",
-          "user: Retry after provider overflow",
+          "user: old user one
+
+      Retry after provider overflow",
+          "user: Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
+      Overflow compacted summary.",
         ],
       ]
     `);
@@ -1689,8 +1505,8 @@ describe('FullCompaction', () => {
       expect.objectContaining({
         event: 'context.apply_compaction',
         args: expect.objectContaining({
-          summary: 'Unknown window compacted summary.',
-          compactedCount: 2,
+          summary: expect.stringContaining('Unknown window compacted summary.'),
+          compactedCount: 4,
         }),
       }),
     );
@@ -1822,8 +1638,8 @@ describe('FullCompaction', () => {
       expect.objectContaining({
         event: 'context.apply_compaction',
         args: expect.objectContaining({
-          summary: 'Placeholder compacted summary.',
-          compactedCount: 2,
+          summary: expect.stringContaining('Placeholder compacted summary.'),
+          compactedCount: 4,
         }),
       }),
     );
@@ -1850,12 +1666,12 @@ describe('FullCompaction', () => {
       [wire] full_compaction.begin       { "source": "auto", "time": "<time>" }
       [emit] compaction.started          { "trigger": "auto" }
       [emit] compaction.blocked          { "turnId": 0 }
-      [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 482, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 0, "maxContextTokens": 1000000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 482, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 482, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction    { "summary": "First compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 6, "time": "<time>" }
-      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 6, "maxContextTokens": 1000000, "contextUsage": 0.000006, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 482, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 482, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 116, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 0, "maxContextTokens": 1000000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 116, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 116, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.apply_compaction    { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nFirst compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 114, "time": "<time>" }
+      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 114, "maxContextTokens": 1000000, "contextUsage": 0.000114, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 116, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 116, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete    { "time": "<time>" }
-      [emit] compaction.completed        { "result": { "summary": "First compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 6 } }
+      [emit] compaction.completed        { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nFirst compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 114 } }
       [wire] context.append_loop_event   { "event": { "type": "step.begin", "uuid": "<uuid-1>", "turnId": "0", "step": 1 }, "time": "<time>" }
       [emit] turn.step.started           { "turnId": 0, "step": 1, "stepId": "<uuid-1>" }
       [emit] assistant.delta             { "turnId": 0, "delta": "I need a tool." }
@@ -1865,10 +1681,10 @@ describe('FullCompaction', () => {
       [emit] tool.call.started           { "turnId": 0, "toolCallId": "call_missing", "name": "MissingTool", "args": {} }
       [wire] context.append_loop_event   { "event": { "type": "tool.result", "parentUuid": "call_missing", "toolCallId": "call_missing", "result": { "output": "Tool \\"MissingTool\\" not found", "isError": true } }, "time": "<time>" }
       [emit] tool.result                 { "turnId": 0, "toolCallId": "call_missing", "output": "Tool \\"MissingTool\\" not found", "isError": true }
-      [wire] context.append_loop_event   { "event": { "type": "step.end", "uuid": "<uuid-1>", "turnId": "0", "step": 1, "usage": { "inputOther": 9, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "tool_use" }, "time": "<time>" }
-      [emit] turn.step.completed         { "turnId": 0, "step": 1, "stepId": "<uuid-1>", "usage": { "inputOther": 9, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "tool_use" }
-      [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 9, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "turn", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 20, "maxContextTokens": 1000000, "contextUsage": 0.00002, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 491, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 491, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 9, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.append_loop_event   { "event": { "type": "step.end", "uuid": "<uuid-1>", "turnId": "0", "step": 1, "usage": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "tool_use" }, "time": "<time>" }
+      [emit] turn.step.completed         { "turnId": 0, "step": 1, "stepId": "<uuid-1>", "usage": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "tool_use" }
+      [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "turn", "time": "<time>" }
+      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 126, "maxContextTokens": 1000000, "contextUsage": 0.000126, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 231, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 231, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [emit] turn.step.interrupted       { "turnId": 0, "step": 2, "reason": "error", "message": "Compaction limit exceeded (1)" }
       [emit] turn.ended                  { "turnId": 0, "reason": "failed", "error": { "code": "context.overflow", "message": "Compaction limit exceeded (1)", "name": "KimiError", "details": { "maxCompactions": 1, "turnId": 0 }, "retryable": true } }
     `);
@@ -1881,49 +1697,16 @@ describe('FullCompaction', () => {
         tools: []
         messages:
           user: text "Trigger repeated compaction"
-          user: text <compaction-instruction>
+          user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work."
 
       call 2:
         messages:
-          assistant: text "First compacted summary."
+          user: text "Trigger repeated compaction"
+          user: text "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nFirst compacted summary."
     `);
     await ctx.expectResumeMatches();
   });
 
-  it('appends the todo list to the compaction summary', async () => {
-    const ctx = testAgent();
-    ctx.configure({
-      provider: CATALOGUED_PROVIDER,
-      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
-    });
-    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
-    ctx.appendExchange(2, 'recent user two', 'recent assistant two', 80);
-
-    ctx.agent.tools.updateStore('todo', [
-      { title: 'Fix the auth bug', status: 'in_progress' },
-      { title: 'Add tests', status: 'pending' },
-    ]);
-
-    const compacted = new Promise<void>((resolve) => {
-      ctx.emitter.once('context.apply_compaction', () => {
-        resolve();
-      });
-    });
-    const completed = ctx.once('compaction.completed');
-
-    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
-    await ctx.rpc.beginCompaction({});
-    await compacted;
-    await completed;
-
-    const history = ctx.compactHistory();
-    expect(history).toHaveLength(1);
-    expect(history[0]).toMatchObject({
-      role: 'assistant',
-      text: 'Compacted summary.\n\n## TODO List\n  [in_progress] Fix the auth bug\n  [pending] Add tests',
-    });
-    await ctx.expectResumeMatches();
-  });
 });
 
 afterEach(() => {
@@ -2066,8 +1849,6 @@ function realKosongGenerate(
 const alwaysCompactOnce: CompactionStrategy = {
   shouldCompact: () => true,
   shouldBlock: () => true,
-  computeCompactCount: (messages: readonly Message[]) => messages.length,
-  reduceCompactOnOverflow: (messages: readonly Message[]) => messages.length,
   checkAfterStep: true,
   maxCompactionPerTurn: 1,
 };
@@ -2081,29 +1862,12 @@ function missingToolCall(): ToolCall {
   };
 }
 
-function testCompactionStrategy(maxSize: number = 1_000): DefaultCompactionStrategy {
-  return new DefaultCompactionStrategy(() => maxSize, {
-    triggerRatio: 0.85,
-    blockRatio: 0.85,
-    reservedContextSize: 0,
-    maxCompactionPerTurn: 3,
-    maxRecentMessages: 10,
-    maxRecentUserMessages: Infinity,
-    maxRecentSizeRatio: 0.2,
-    minOverflowReductionRatio: 0.05,
-  });
-}
-
 function overflowOnlyCompactionStrategy(maxSize: number = 14): DefaultCompactionStrategy {
   return new DefaultCompactionStrategy(() => maxSize, {
     triggerRatio: Infinity,
     blockRatio: Infinity,
     reservedContextSize: 0,
     maxCompactionPerTurn: 3,
-    maxRecentMessages: 3,
-    maxRecentUserMessages: Infinity,
-    maxRecentSizeRatio: 0.2,
-    minOverflowReductionRatio: 0.05,
   });
 }
 
@@ -2148,5 +1912,5 @@ function inputHistorySnapshot(history: readonly Message[]): string[] {
 }
 
 function normalizeInputText(text: string): string {
-  return text.includes('compact this conversation context') ? '<compaction-instruction>' : text;
+  return text.includes('CONTEXT CHECKPOINT COMPACTION') ? '<compaction-instruction>' : text;
 }
diff --git a/packages/agent-core/test/agent/compaction/memento.test.ts b/packages/agent-core/test/agent/compaction/memento.test.ts
new file mode 100644
index 000000000..15e6cb780
--- /dev/null
+++ b/packages/agent-core/test/agent/compaction/memento.test.ts
@@ -0,0 +1,99 @@
+import type { Message } from '@moonshot-ai/kosong';
+import { describe, expect, it } from 'vitest';
+
+import {
+  COMPACTION_SUMMARY_PREFIX,
+  buildCompactionSummaryText,
+  collectCompactableUserMessages,
+  isCompactionSummaryMessage,
+  selectRecentUserMessages,
+} from '../../../src/agent/compaction';
+import { estimateTokens, estimateTokensForMessage } from '../../../src/utils/tokens';
+
+function textMessage(role: 'user' | 'assistant' | 'tool', text: string): Message {
+  return { role, content: [{ type: 'text', text }], toolCalls: [] };
+}
+
+function messageText(message: Message): string {
+  return message.content.map((part) => (part.type === 'text' ? part.text : '')).join('');
+}
+
+describe('isCompactionSummaryMessage', () => {
+  it('detects the compaction origin', () => {
+    const message = {
+      ...textMessage('user', 'anything'),
+      origin: { kind: 'compaction_summary' as const },
+    };
+    expect(isCompactionSummaryMessage(message)).toBe(true);
+  });
+
+  it('detects the summary prefix', () => {
+    expect(isCompactionSummaryMessage(textMessage('user', `${COMPACTION_SUMMARY_PREFIX}\nsummary`))).toBe(true);
+  });
+
+  it('ignores ordinary user messages', () => {
+    expect(isCompactionSummaryMessage(textMessage('user', 'hello'))).toBe(false);
+  });
+});
+
+describe('collectCompactableUserMessages', () => {
+  it('keeps only user messages', () => {
+    const messages = [
+      textMessage('user', 'u1'),
+      textMessage('assistant', 'a1'),
+      textMessage('tool', 't1'),
+      textMessage('user', 'u2'),
+    ];
+
+    expect(collectCompactableUserMessages(messages).map(messageText)).toEqual(['u1', 'u2']);
+  });
+
+  it('drops previous compaction summaries', () => {
+    const summary = {
+      ...textMessage('user', `${COMPACTION_SUMMARY_PREFIX}\nold summary`),
+      origin: { kind: 'compaction_summary' as const },
+    };
+    const messages = [textMessage('user', 'u1'), summary, textMessage('user', 'u2')];
+
+    expect(collectCompactableUserMessages(messages).map(messageText)).toEqual(['u1', 'u2']);
+  });
+});
+
+describe('selectRecentUserMessages', () => {
+  it('keeps the most recent messages within the budget', () => {
+    const messages = [
+      textMessage('user', 'old'),
+      textMessage('user', 'mid'),
+      textMessage('user', 'recent'),
+    ];
+    const budget = estimateTokensForMessage(messages[1]!) + estimateTokensForMessage(messages[2]!);
+
+    expect(selectRecentUserMessages(messages, budget).map(messageText)).toEqual(['mid', 'recent']);
+  });
+
+  it('truncates the oldest kept message when it would overflow the budget', () => {
+    const long = 'x'.repeat(1_000);
+    const messages = [textMessage('user', long), textMessage('user', 'recent')];
+    const budget = estimateTokensForMessage(messages[1]!) + 10;
+
+    const selected = selectRecentUserMessages(messages, budget);
+
+    expect(selected).toHaveLength(2);
+    expect(estimateTokens(messageText(selected[0]!))).toBeLessThanOrEqual(10);
+    expect(messageText(selected[1]!)).toBe('recent');
+  });
+
+  it('returns nothing when the budget is zero', () => {
+    expect(selectRecentUserMessages([textMessage('user', 'hi')], 0)).toEqual([]);
+  });
+});
+
+describe('buildCompactionSummaryText', () => {
+  it('prefixes the summary', () => {
+    expect(buildCompactionSummaryText('Summary.')).toBe(`${COMPACTION_SUMMARY_PREFIX}\nSummary.`);
+  });
+
+  it('falls back when the summary is empty', () => {
+    expect(buildCompactionSummaryText('   ')).toBe(`${COMPACTION_SUMMARY_PREFIX}\n(no summary available)`);
+  });
+});
diff --git a/packages/agent-core/test/agent/compaction/micro.test.ts b/packages/agent-core/test/agent/compaction/micro.test.ts
index 91be825d1..8847d3684 100644
--- a/packages/agent-core/test/agent/compaction/micro.test.ts
+++ b/packages/agent-core/test/agent/compaction/micro.test.ts
@@ -697,10 +697,10 @@ describe('MicroCompaction', () => {
     await ctx.rpc.beginCompaction({});
     await compacted;
 
-    expect(ctx.agent.context.messages).toHaveLength(1);
-    expect(ctx.agent.context.messages[0]).toMatchObject({
-      role: 'assistant',
-      content: [{ type: 'text', text: 'Summary.' }],
+    expect(ctx.agent.context.messages).toHaveLength(2);
+    expect(ctx.agent.context.messages[1]).toMatchObject({
+      role: 'user',
+      content: [{ type: 'text', text: expect.stringContaining('Summary.') }],
     });
   });
 
diff --git a/packages/agent-core/test/agent/compaction/strategy.test.ts b/packages/agent-core/test/agent/compaction/strategy.test.ts
index ebc4c7cdd..d5eaa5ca9 100644
--- a/packages/agent-core/test/agent/compaction/strategy.test.ts
+++ b/packages/agent-core/test/agent/compaction/strategy.test.ts
@@ -1,188 +1,86 @@
-
-import {
-  type Message
-} from '@moonshot-ai/kosong';
 import { describe, expect, it } from 'vitest';
 
-import { DefaultCompactionStrategy } from '../../../src/agent/compaction';
-import { estimateTokensForMessages } from '../../../src/utils/tokens';
+import {
+  DEFAULT_COMPACTION_CONFIG,
+  DefaultCompactionStrategy,
+} from '../../../src/agent/compaction';
 
 describe('DefaultCompactionStrategy', () => {
-  it('keeps an oversized trailing user message as recent', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', `pending user ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('keeps consecutive trailing user messages as recent', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', `pending user one ${'x'.repeat(1_200)}`),
-      textMessage('user', `pending user two ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('compacts the prefix when the trailing exchange itself is oversized', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', 'recent user'),
-      textMessage('assistant', `recent assistant ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('returns 0 when there is nothing to compact', () => {
-    const strategy = testCompactionStrategy();
-    expect(strategy.computeCompactCount([], 'auto')).toBe(0);
-    expect(strategy.computeCompactCount([textMessage('user', 'only pending')], 'auto')).toBe(0);
-    expect(
-      strategy.computeCompactCount(
-        [
-          textMessage('user', 'a'),
-          textMessage('user', 'b'),
-          textMessage('user', 'c'),
-        ],
-        'auto',
-      ),
-    ).toBe(0);
-  });
-
-  it('returns 0 when no intermediate split exists and the last message is also unsplittable', () => {
-    const strategy = testCompactionStrategy();
-    const messages: Message[] = [
-      textMessage('user', 'inspect'),
-      {
-        role: 'assistant',
-        content: [],
-        toolCalls: [{ type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' }],
-      },
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(0);
-  });
-
-  it('does not split inside a parallel tool exchange', () => {
-    const strategy = testCompactionStrategy();
-    const messages: Message[] = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', 'run both tools'),
-      {
-        role: 'assistant',
-        content: [],
-        toolCalls: [
-          { type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' },
-          { type: 'function', id: 'call_b', name: 'Lookup', arguments: '{}' },
-        ],
-      },
-      { role: 'tool', content: [{ type: 'text', text: 'a' }], toolCalls: [], toolCallId: 'call_a' },
-      { role: 'tool', content: [{ type: 'text', text: 'b' }], toolCalls: [], toolCallId: 'call_b' },
-      textMessage('user', 'next prompt'),
-    ];
-
-    // The only valid split is before the parallel exchange (after 'old assistant'),
-    // never between tool_a and tool_b — that would leave tool_b as an orphan.
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('shrinks auto compaction input to fit the model window', () => {
-    const maxSize = 1_000;
-    const strategy = testCompactionStrategy(maxSize);
-    const messages = Array.from({ length: 30 }, (_, i) =>
-      textMessage('assistant', `message ${i} ${'x'.repeat(400)}`),
-    );
-
-    const count = strategy.computeCompactCount(messages, 'auto');
+  it('triggers auto-compaction at 90% of the context window', () => {
+    const strategy = new DefaultCompactionStrategy(() => 100_000, {
+      ...DEFAULT_COMPACTION_CONFIG,
+      reservedContextSize: 0,
+    });
 
-    expect(count).toBeGreaterThan(0);
-    expect(count).toBeLessThan(messages.length);
-    expect(estimateTokensForMessages(messages.slice(0, count))).toBeLessThanOrEqual(maxSize);
-    expect(estimateTokensForMessages(messages.slice(0, count + 1))).toBeGreaterThan(maxSize);
+    expect(strategy.shouldCompact(89_999)).toBe(false);
+    expect(strategy.shouldCompact(90_000)).toBe(true);
+    expect(strategy.shouldCompact(100_000)).toBe(true);
   });
 
-  it('shrinks manual compaction input to fit the model window', () => {
-    const maxSize = 1_000;
-    const strategy = testCompactionStrategy(maxSize);
-    const messages = Array.from({ length: 30 }, (_, i) =>
-      textMessage('assistant', `message ${i} ${'x'.repeat(400)}`),
-    );
-
-    const count = strategy.computeCompactCount(messages, 'manual');
+  it('blocks at the same threshold by default (synchronous compaction)', () => {
+    const strategy = new DefaultCompactionStrategy(() => 100_000, {
+      ...DEFAULT_COMPACTION_CONFIG,
+      reservedContextSize: 0,
+    });
 
-    expect(count).toBeGreaterThan(0);
-    expect(count).toBeLessThan(messages.length);
-    expect(estimateTokensForMessages(messages.slice(0, count))).toBeLessThanOrEqual(maxSize);
-    expect(estimateTokensForMessages(messages.slice(0, count + 1))).toBeGreaterThan(maxSize);
+    expect(strategy.shouldBlock(89_999)).toBe(false);
+    expect(strategy.shouldBlock(90_000)).toBe(true);
+    expect(strategy.checkAfterStep).toBe(false);
   });
 
-  it('reserves response context by default before the ratio threshold is reached', () => {
+  it('reserves response context before the ratio threshold is reached', () => {
     const strategy = new DefaultCompactionStrategy(() => 256_000);
 
+    // 256k * 0.9 = 230_400, and the 50k reserve triggers at 206k.
     expect(strategy.shouldCompact(210_000)).toBe(true);
     expect(strategy.shouldBlock(210_000)).toBe(true);
   });
 
   it('ignores reserved context when the reserve is not smaller than the model window', () => {
     const strategy = new DefaultCompactionStrategy(() => 32_000, {
-      triggerRatio: 0.85,
-      blockRatio: 0.85,
+      triggerRatio: 0.9,
+      blockRatio: 0.9,
       reservedContextSize: 50_000,
       maxCompactionPerTurn: 3,
-      maxRecentMessages: 3,
-      maxRecentUserMessages: Infinity,
-      maxRecentSizeRatio: 0.2,
-      minOverflowReductionRatio: 0.05,
     });
 
     expect(strategy.shouldCompact(1)).toBe(false);
     expect(strategy.shouldBlock(1)).toBe(false);
-    expect(strategy.shouldCompact(28_000)).toBe(true);
-    expect(strategy.shouldBlock(28_000)).toBe(true);
+    // Falls back to the 90% ratio: 32_000 * 0.9 = 28_800.
+    expect(strategy.shouldCompact(28_800)).toBe(true);
+    expect(strategy.shouldBlock(28_800)).toBe(true);
   });
-});
 
-function testCompactionStrategy(maxSize: number = 1_000): DefaultCompactionStrategy {
-  return new DefaultCompactionStrategy(() => maxSize, {
-    triggerRatio: 0.85,
-    blockRatio: 0.85,
-    reservedContextSize: 0,
-    maxCompactionPerTurn: 3,
-    maxRecentMessages: 10,
-    maxRecentUserMessages: Infinity,
-    maxRecentSizeRatio: 0.2,
-    minOverflowReductionRatio: 0.05,
+  it('does not compact when the context window is unknown', () => {
+    const strategy = new DefaultCompactionStrategy(() => 0);
+
+    expect(strategy.shouldCompact(1_000_000)).toBe(false);
+    expect(strategy.shouldBlock(1_000_000)).toBe(false);
   });
-}
 
-function overflowOnlyCompactionStrategy(maxSize: number = 14): DefaultCompactionStrategy {
+  it('enables after-step checks only when ratios differ (async compaction)', () => {
+    const strategy = new DefaultCompactionStrategy(() => 100_000, {
+      triggerRatio: 0.8,
+      blockRatio: 0.9,
+      reservedContextSize: 0,
+      maxCompactionPerTurn: 3,
+    });
+
+    expect(strategy.checkAfterStep).toBe(true);
+  });
+
+  it('exposes maxCompactionPerTurn', () => {
+    const strategy = testCompactionStrategy();
+
+    expect(strategy.maxCompactionPerTurn).toBe(3);
+  });
+});
+
+function testCompactionStrategy(maxSize: number = 1_000): DefaultCompactionStrategy {
   return new DefaultCompactionStrategy(() => maxSize, {
-    triggerRatio: Infinity,
-    blockRatio: Infinity,
+    triggerRatio: 0.9,
+    blockRatio: 0.9,
     reservedContextSize: 0,
     maxCompactionPerTurn: 3,
-    maxRecentMessages: 3,
-    maxRecentUserMessages: Infinity,
-    maxRecentSizeRatio: 0.2,
-    minOverflowReductionRatio: 0.05,
   });
 }
-
-function textMessage(role: 'user' | 'assistant', text: string): Message {
-  return {
-    role,
-    content: [{ type: 'text', text }],
-    toolCalls: [],
-  };
-}
diff --git a/packages/agent-core/test/agent/context.test.ts b/packages/agent-core/test/agent/context.test.ts
index 7efa1f066..dc7238dcc 100644
--- a/packages/agent-core/test/agent/context.test.ts
+++ b/packages/agent-core/test/agent/context.test.ts
@@ -449,7 +449,7 @@ describe('Agent context', () => {
     await ctx.expectResumeMatches();
   });
 
-  it('preserves deferred reminders when compaction keeps a pending tool exchange', async () => {
+  it('drops deferred reminders when compaction drops a pending tool exchange', async () => {
     const ctx = testAgent();
     ctx.configure();
 
@@ -462,7 +462,7 @@ describe('Agent context', () => {
     });
     ctx.agent.context.applyCompaction({
       summary: 'summary of old prompt',
-      compactedCount: 1,
+      compactedCount: 4,
       tokensBefore: 100,
       tokensAfter: 40,
     });
@@ -471,11 +471,16 @@ describe('Agent context', () => {
       variant: 'host',
     });
 
+    // Compaction keeps only the real user prompt plus the summary; the deferred
+    // first reminder is dropped because initial context is rebuilt every turn.
+    // The second reminder, appended after compaction, is preserved.
     expect(ctx.agent.context.messages.map((message) => message.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
+      'user',
+      'user',
+    ]);
+    expect(ctx.agent.context.messages[2]?.content).toEqual([
+      { type: 'text', text: '<system-reminder>\nsecond reminder\n</system-reminder>' },
     ]);
 
     ctx.dispatch({
@@ -488,21 +493,13 @@ describe('Agent context', () => {
       },
     });
 
+    // The pending tool exchange was dropped by compaction, so the late tool
+    // result is ignored and the history is unchanged.
     expect(ctx.agent.context.messages.map((message) => message.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
-      'tool',
       'user',
       'user',
     ]);
-    expect(ctx.agent.context.messages[5]?.content).toEqual([
-      { type: 'text', text: '<system-reminder>\nfirst reminder\n</system-reminder>' },
-    ]);
-    expect(ctx.agent.context.messages[6]?.content).toEqual([
-      { type: 'text', text: '<system-reminder>\nsecond reminder\n</system-reminder>' },
-    ]);
     await ctx.expectResumeMatches();
   });
 
@@ -536,7 +533,7 @@ describe('Agent context', () => {
       tokensBefore: 100,
       tokensAfter: 20,
     });
-    expect(ctx.agent.context.history[0]?.origin).toEqual({ kind: 'compaction_summary' });
+    expect(ctx.agent.context.history.at(-1)?.origin).toEqual({ kind: 'compaction_summary' });
 
     ctx.mockNextResponse({ type: 'text', text: 'after compaction' });
     await ctx.rpc.prompt({ input: [{ type: 'text', text: 'new prompt' }] });
@@ -546,8 +543,9 @@ describe('Agent context', () => {
       system: <system-prompt>
       tools: []
       messages:
-        assistant: text "summary of old context"
-        user: text "recent user message\\n\\nnew prompt"
+        user: text "old user message\\n\\nrecent user message"
+        user: text "summary of old context"
+        user: text "new prompt"
     `);
     await ctx.expectResumeMatches();
   });
@@ -716,7 +714,11 @@ describe('Agent context', () => {
 
     expect(ctx.agent.context.history).toEqual([
       expect.objectContaining({
-        role: 'assistant',
+        role: 'user',
+        content: [{ type: 'text', text: 'old user message' }],
+      }),
+      expect.objectContaining({
+        role: 'user',
         origin: { kind: 'compaction_summary' },
         content: [{ type: 'text', text: 'summary of compacted context' }],
       }),
@@ -752,7 +754,11 @@ describe('Agent context', () => {
     }).not.toThrow();
     expect(ctx.agent.context.history).toEqual([
       expect.objectContaining({
-        role: 'assistant',
+        role: 'user',
+        content: [{ type: 'text', text: 'old user message' }],
+      }),
+      expect.objectContaining({
+        role: 'user',
         origin: { kind: 'compaction_summary' },
         content: [{ type: 'text', text: 'summary of compacted context' }],
       }),
diff --git a/packages/agent-core/test/agent/resume.test.ts b/packages/agent-core/test/agent/resume.test.ts
index 301e2533a..087f7012b 100644
--- a/packages/agent-core/test/agent/resume.test.ts
+++ b/packages/agent-core/test/agent/resume.test.ts
@@ -79,7 +79,8 @@ describe('Agent resume', () => {
         system: <system-prompt>
         tools: Bash
         messages:
-          assistant: text "Historical compacted summary."
+          user: text "Historical prompt"
+          user: text "Historical compacted summary."
           user: text "Fresh prompt after resume"
           user: text <plan-mode-reminder>
     `);
@@ -355,7 +356,11 @@ describe('Agent resume', () => {
 
     expect(ctx.agent.context.history).toEqual([
       expect.objectContaining({
-        role: 'assistant',
+        role: 'user',
+        content: [{ type: 'text', text: 'Historical prompt before compaction' }],
+      }),
+      expect.objectContaining({
+        role: 'user',
         content: [{ type: 'text', text: 'Compacted implementation notes.' }],
         origin: { kind: 'compaction_summary' },
       }),
diff --git a/packages/agent-core/test/prompt-placeholders.test.ts b/packages/agent-core/test/prompt-placeholders.test.ts
index 4068979b5..2809d46db 100644
--- a/packages/agent-core/test/prompt-placeholders.test.ts
+++ b/packages/agent-core/test/prompt-placeholders.test.ts
@@ -23,7 +23,6 @@ const SRC = join(import.meta.dirname, '..', 'src');
 // templated prompt file is introduced.
 const TEMPLATED = new Set([
   'profile/default/system.md',
-  'agent/compaction/compaction-instruction.md',
   'tools/builtin/file/read.md',
   'tools/builtin/file/read-media.md',
   'tools/builtin/shell/bash.md',
diff --git a/packages/agent-core/test/services/message-transcript.test.ts b/packages/agent-core/test/services/message-transcript.test.ts
index 4ec462f2d..b93cee676 100644
--- a/packages/agent-core/test/services/message-transcript.test.ts
+++ b/packages/agent-core/test/services/message-transcript.test.ts
@@ -92,27 +92,27 @@ describe('reduceWireRecords', () => {
     expect(foldedLength).toBe(2);
   });
 
-  it('compaction keeps the prefix and inserts the summary at the fold point', () => {
+  it('compaction keeps the prefix and appends the user-role summary', () => {
     const { entries, foldedLength } = reduceWireRecords([
       appendMessage(userMessage('u1')),
       ...assistantStep('s1', 'a1'),
       appendMessage(userMessage('u2')),
       ...assistantStep('s2', 'a2'),
-      // folded history is [u1, a1, u2, a2]; compact the first 3, keep a2.
-      compaction('SUM', 3),
+      compaction('SUM', 4),
       appendMessage(userMessage('u3')),
     ]);
     expect(entries.map((e) => textOf(e.message))).toEqual([
       'u1',
       'a1',
       'u2',
-      'SUM',
       'a2',
+      'SUM',
       'u3',
     ]);
-    expect(entries[3]!.message.origin).toEqual({ kind: 'compaction_summary' });
-    // live folded view would be [SUM, a2, u3]
-    expect(foldedLength).toBe(3);
+    expect(entries[4]!.message.origin).toEqual({ kind: 'compaction_summary' });
+    expect(entries[4]!.message.role).toBe('user');
+    // live folded view would be [u1, u2, SUM, u3]
+    expect(foldedLength).toBe(4);
   });
 
   it('handles repeated compactions', () => {
@@ -120,11 +120,11 @@ describe('reduceWireRecords', () => {
       appendMessage(userMessage('u1')),
       compaction('S1', 1),
       appendMessage(userMessage('u2')),
-      // folded = [S1, u2]; compact both.
-      compaction('S2', 2),
+      compaction('S2', 3),
     ]);
     expect(entries.map((e) => textOf(e.message))).toEqual(['u1', 'S1', 'u2', 'S2']);
-    expect(foldedLength).toBe(1);
+    // live folded view would be [u1, u2, S2]
+    expect(foldedLength).toBe(3);
   });
 
   it('undo removes through the last real user prompt and skips injections', () => {
@@ -433,19 +433,16 @@ describe('MessageService over a compacted wire log', () => {
       records.map((r) => JSON.stringify(r)).join('\n') + '\n',
       'utf8',
     );
-    // What getContext would return after the fold.
+    // What getContext would return after the fold: kept user messages + summary.
     liveHistory = [
+      userMessage('u1'),
+      userMessage('u2'),
       {
-        role: 'assistant',
+        role: 'user',
         content: [{ type: 'text', text: 'SUM' }],
         toolCalls: [],
         origin: { kind: 'compaction_summary' },
       } as ContextMessage,
-      {
-        role: 'assistant',
-        content: [{ type: 'text', text: 'a2' }],
-        toolCalls: [],
-      } as ContextMessage,
     ];
     const rpc: Partial<CoreRPC> = {
       listSessions: vi.fn().mockImplementation(async () => [summary()]),
@@ -473,8 +470,8 @@ describe('MessageService over a compacted wire log', () => {
     const asc = [...page.items].reverse();
     expect(
       asc.map((m) => (m.content[0] as { text?: string }).text ?? '[non-text]'),
-    ).toEqual(['u1', 'a1', 'u2', 'SUM', 'a2']);
-    expect(asc[3]!.metadata).toEqual({ origin: { kind: 'compaction_summary' } });
+    ).toEqual(['u1', 'a1', 'u2', 'a2', 'SUM']);
+    expect(asc[4]!.metadata).toEqual({ origin: { kind: 'compaction_summary' } });
   });
 
   it('uses wire record times for created_at, strictly increasing', async () => {
@@ -495,7 +492,7 @@ describe('MessageService over a compacted wire log', () => {
     const asc = [...page.items].reverse();
     expect(
       asc.map((m) => (m.content[0] as { text?: string }).text ?? '[non-text]'),
-    ).toEqual(['u1', 'a1', 'u2', 'SUM', 'a2', 'u3-live']);
+    ).toEqual(['u1', 'a1', 'u2', 'a2', 'SUM', 'u3-live']);
   });
 
   it('get() resolves ids against the same full transcript', async () => {
@@ -511,8 +508,9 @@ describe('MessageService over a compacted wire log', () => {
     const page = await impl.list(SESSION_ID, { page_size: 100 });
     const asc = [...page.items].reverse();
     expect(asc.map((m) => (m.content[0] as { text?: string }).text)).toEqual([
+      'u1',
+      'u2',
       'SUM',
-      'a2',
     ]);
   });
 
@@ -530,6 +528,6 @@ describe('MessageService over a compacted wire log', () => {
     const asc = [...page.items].reverse();
     expect(
       asc.map((m) => (m.content[0] as { text?: string }).text ?? '[non-text]'),
-    ).toEqual(['u1', 'a1', 'u2', 'SUM', 'a2', 'u3']);
+    ).toEqual(['u1', 'a1', 'u2', 'a2', 'SUM', 'u3']);
   });
 });
diff --git a/packages/server/test/sessions.e2e.test.ts b/packages/server/test/sessions.e2e.test.ts
index 090edf346..c6b902e6b 100644
--- a/packages/server/test/sessions.e2e.test.ts
+++ b/packages/server/test/sessions.e2e.test.ts
@@ -566,7 +566,7 @@ describe('POST /api/v1/sessions/{session_id}:compact — begin compaction', () =
     const env = envelopeOf<unknown>(res.json());
     expect(env.code).toBe(ErrorCode.COMPACTION_UNABLE);
     expect(env.data).toBeNull();
-    expect(env.msg).toMatch(/No prefix/);
+    expect(env.msg).toMatch(/No messages to compact/);
   });
 });
 
diff --git a/packages/server/test/snapshotService.unit.test.ts b/packages/server/test/snapshotService.unit.test.ts
index 1c328e990..be50822eb 100644
--- a/packages/server/test/snapshotService.unit.test.ts
+++ b/packages/server/test/snapshotService.unit.test.ts
@@ -235,11 +235,11 @@ describe('SnapshotService.read', () => {
     ]);
 
     const snap = await f.service.read(sid);
-    // Reduce keeps the prefix and inserts the summary at the fold; final
-    // entry list is older-1, older-2, <summary>, after-compaction.
+    // Reduce keeps the prefix and appends a user-role summary; final entry
+    // list is older-1, older-2, <summary>, after-compaction.
     expect(snap.messages.items).toHaveLength(4);
     const summaryMsg = snap.messages.items[2]!;
-    expect(summaryMsg.role).toBe('assistant');
+    expect(summaryMsg.role).toBe('user');
     expect((summaryMsg.content[0] as { text: string }).text).toBe('compacted prefix');
     expect(snap.messages.items[3]!.role).toBe('user');
   });

From 32c70b3bc16cc82b178a21ae2c02322fb9b6ec89 Mon Sep 17 00:00:00 2001
From: 7Sageer <7sageer@djwcb.cn>
Date: Mon, 29 Jun 2026 15:46:38 +0800
Subject: [PATCH 02/16] fix(agent-core): compaction follow-ups

- Revert auto-compaction trigger/block ratio to 0.85

- Rewrite truncateTextToTokens as a single-pass O(n) scan so CJK inputs do not freeze compaction

- Mirror pending tool-exchange and deferred cleanup in the wire transcript reducer

- Append the todo list to the compaction summary again

- Restore the no-tools guard in the compaction prompt
---
 .../compaction/compaction-instruction.md      |  2 +
 .../agent-core/src/agent/compaction/full.ts   | 17 +++++-
 .../src/agent/compaction/memento.ts           | 21 ++++++--
 .../src/agent/compaction/strategy.ts          |  4 +-
 .../src/services/message/transcript.ts        |  7 ++-
 .../test/agent/compaction/full.test.ts        | 52 +++++++++----------
 .../test/agent/compaction/memento.test.ts     | 13 +++++
 .../test/agent/compaction/strategy.test.ts    | 16 +++---
 .../test/services/message-transcript.test.ts  | 33 ++++++++++++
 9 files changed, 121 insertions(+), 44 deletions(-)

diff --git a/packages/agent-core/src/agent/compaction/compaction-instruction.md b/packages/agent-core/src/agent/compaction/compaction-instruction.md
index 42fae605d..3f4bcacd3 100644
--- a/packages/agent-core/src/agent/compaction/compaction-instruction.md
+++ b/packages/agent-core/src/agent/compaction/compaction-instruction.md
@@ -7,3 +7,5 @@ Include:
 - Any critical data, examples, or references needed to continue
 
 Be concise, structured, and focused on helping the next LLM seamlessly continue the work.
+
+Respond with text only. Do not call any tools — you already have everything you need in the conversation history.
diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts
index 12eb17c2b..953e42866 100644
--- a/packages/agent-core/src/agent/compaction/full.ts
+++ b/packages/agent-core/src/agent/compaction/full.ts
@@ -21,6 +21,11 @@ import {
   retryBackoffDelays,
   sleepForRetry,
 } from '../../loop/retry';
+import {
+  renderTodoList,
+  TODO_STORE_KEY,
+  type TodoItem,
+} from '../../tools/builtin/state/todo-list';
 import {
   estimateTokens,
   estimateTokensForMessages,
@@ -305,6 +310,16 @@ export class FullCompaction {
     return `${base}\n\n${customInstruction}`;
   }
 
+  private postProcessSummary(summary: string): string {
+    const storeData = this.agent.tools.storeData();
+    const todos = (storeData[TODO_STORE_KEY] as readonly TodoItem[] | undefined) ?? [];
+    if (todos.length === 0) {
+      return summary;
+    }
+    const todoMarkdown = renderTodoList(todos, '## TODO List');
+    return `${summary.trim()}\n\n${todoMarkdown}`;
+  }
+
   private async compactionRound(
     signal: AbortSignal,
     data: Readonly<CompactionBeginData>,
@@ -409,7 +424,7 @@ export class FullCompaction {
         }
       }
 
-      const summaryText = buildCompactionSummaryText(summary ?? '');
+      const summaryText = buildCompactionSummaryText(this.postProcessSummary(summary ?? ''));
       const keptUserMessages = selectRecentUserMessages(
         collectCompactableUserMessages(originalHistory),
         COMPACT_USER_MESSAGE_MAX_TOKENS,
diff --git a/packages/agent-core/src/agent/compaction/memento.ts b/packages/agent-core/src/agent/compaction/memento.ts
index 2af6a0abf..705278ea3 100644
--- a/packages/agent-core/src/agent/compaction/memento.ts
+++ b/packages/agent-core/src/agent/compaction/memento.ts
@@ -1,4 +1,4 @@
-import { estimateTokens, estimateTokensForMessage } from '../../utils/tokens';
+import { estimateTokensForMessage } from '../../utils/tokens';
 import summaryPrefixTemplate from './compaction-summary-prefix.md?raw';
 
 /**
@@ -64,10 +64,21 @@ export function collectCompactableUserMessages<T extends MessageLike>(messages:
 
 function truncateTextToTokens(text: string, maxTokens: number): string {
   if (maxTokens <= 0) return '';
-  if (estimateTokens(text) <= maxTokens) return text;
-  let end = Math.min(text.length, maxTokens * 4);
-  while (end > 0 && estimateTokens(text.slice(0, end)) > maxTokens) {
-    end--;
+  // Single pass: walk the string once, mirroring estimateTokens' heuristic
+  // (ASCII ~4 chars/token, non-ASCII ~1 char/token) and stop at the first
+  // code point that would push the running total over the budget. This keeps
+  // CJK-heavy inputs from the O(n^2) cost of re-estimating shrinking prefixes.
+  let asciiCount = 0;
+  let nonAsciiCount = 0;
+  let end = 0;
+  for (const char of text) {
+    if (char.codePointAt(0)! <= 127) {
+      asciiCount++;
+    } else {
+      nonAsciiCount++;
+    }
+    if (Math.ceil(asciiCount / 4) + nonAsciiCount > maxTokens) break;
+    end += char.length;
   }
   return text.slice(0, end);
 }
diff --git a/packages/agent-core/src/agent/compaction/strategy.ts b/packages/agent-core/src/agent/compaction/strategy.ts
index faff72c0f..9fb8e8b41 100644
--- a/packages/agent-core/src/agent/compaction/strategy.ts
+++ b/packages/agent-core/src/agent/compaction/strategy.ts
@@ -17,8 +17,8 @@ export interface CompactionConfig {
  * compaction.
  */
 export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = {
-  triggerRatio: 0.9,
-  blockRatio: 0.9,
+  triggerRatio: 0.85,
+  blockRatio: 0.85,
   reservedContextSize: 50_000,
   maxCompactionPerTurn: Infinity,
 };
diff --git a/packages/agent-core/src/services/message/transcript.ts b/packages/agent-core/src/services/message/transcript.ts
index 771dc16cc..7961be9b2 100644
--- a/packages/agent-core/src/services/message/transcript.ts
+++ b/packages/agent-core/src/services/message/transcript.ts
@@ -261,8 +261,11 @@ export function reduceWireRecords(records: Iterable<AgentRecord>): {
           time: record.time,
         });
         foldedLength = keptUserMessages.length + 1;
-        openSteps.clear();
-        flushDeferredIfToolExchangeClosed();
+        // Drop any open tool exchange and deferred messages exactly like
+        // ContextMemory.applyCompaction: late tool results become orphans and
+        // deferred injections are not rebuilt, so pending ids must not strand
+        // later appends in `deferred`.
+        resetOpenState();
         break;
       }
       case 'context.undo':
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index 5bce74217..70b5dfcf0 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -76,10 +76,10 @@ describe('FullCompaction', () => {
       [wire] context.append_message     { "message": { "role": "user", "content": [ { "type": "text", "text": "recent user three" } ], "toolCalls": [], "origin": { "kind": "user" } }, "time": "<time>" }
       [wire] full_compaction.begin      { "source": "manual", "instruction": "Keep the important test facts.", "time": "<time>" }
       [emit] compaction.started         { "trigger": "manual", "instruction": "Keep the important test facts." }
-      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 155, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 120, "maxContextTokens": 256000, "contextUsage": 0.00046875, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 155, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 155, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 184, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 120, "maxContextTokens": 256000, "contextUsage": 0.00046875, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 184, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 184, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] context.apply_compaction   { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119, "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 119, "maxContextTokens": 256000, "contextUsage": 0.00046484375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 155, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 155, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 119, "maxContextTokens": 256000, "contextUsage": 0.00046484375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 184, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 184, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete   { "time": "<time>" }
       [emit] compaction.completed       { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119 } }
     `);
@@ -93,7 +93,7 @@ describe('FullCompaction', () => {
         assistant: text "old assistant two"
         user: text "recent user three"
         assistant: text "recent assistant three"
-        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nKeep the important test facts."
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history.\\n\\nKeep the important test facts."
     `);
     expect(ctx.compactHistory()).toMatchInlineSnapshot(`
       [
@@ -127,7 +127,7 @@ describe('FullCompaction', () => {
         compactedCount: 6,
         retryCount: 0,
         thinkingLevel: 'off',
-        inputOther: 155,
+        inputOther: 184,
         output: 8,
         inputCacheRead: 0,
         inputCacheCreation: 0,
@@ -828,7 +828,7 @@ describe('FullCompaction', () => {
         user: text "run both tools"
         assistant: []  calls call_open_one:LookupOne { "query": "one" }, call_open_two:LookupTwo { "query": "two" }
         tool[call_open_one]: text "one result"
-        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nKeep stable facts."
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history.\\n\\nKeep stable facts."
     `);
     // The unresolved tool exchange is sent to the model (see the compaction input
     // above) but is dropped from the replacement history, leaving only the real
@@ -878,10 +878,10 @@ describe('FullCompaction', () => {
       [wire] full_compaction.begin      { "source": "manual", "time": "<time>" }
       [emit] compaction.started         { "trigger": "manual" }
       [wire] context.append_message     { "message": { "role": "user", "content": [ { "type": "text", "text": "new user while compacting" } ], "toolCalls": [], "origin": { "kind": "user" } }, "time": "<time>" }
-      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 133, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 80, "maxContextTokens": 256000, "contextUsage": 0.0003125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 133, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 133, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 162, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 80, "maxContextTokens": 256000, "contextUsage": 0.0003125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 162, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 162, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] context.apply_compaction   { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 114, "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 114, "maxContextTokens": 256000, "contextUsage": 0.0004453125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 133, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 133, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 114, "maxContextTokens": 256000, "contextUsage": 0.0004453125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 162, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 162, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete   { "time": "<time>" }
       [emit] compaction.completed       { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 114 } }
     `);
@@ -893,7 +893,7 @@ describe('FullCompaction', () => {
         assistant: text "old assistant one"
         user: text "recent user two"
         assistant: text "recent assistant two"
-        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work."
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history."
     `);
     expect(ctx.compactHistory()).toMatchInlineSnapshot(`
       [
@@ -942,8 +942,8 @@ describe('FullCompaction', () => {
       [emit] compaction.started       { "trigger": "manual" }
       [wire] context.clear            { "time": "<time>" }
       [emit] agent.status.updated     { "model": "kimi-code", "contextTokens": 0, "maxContextTokens": 256000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual" }
-      [wire] usage.record             { "model": "kimi-code", "usage": { "inputOther": 133, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated     { "model": "kimi-code", "contextTokens": 0, "maxContextTokens": 256000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 133, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 133, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record             { "model": "kimi-code", "usage": { "inputOther": 162, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated     { "model": "kimi-code", "contextTokens": 0, "maxContextTokens": 256000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 162, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 162, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.cancel   { "time": "<time>" }
       [emit] compaction.cancelled     {}
     `);
@@ -955,7 +955,7 @@ describe('FullCompaction', () => {
         assistant: text "old assistant one"
         user: text "recent user two"
         assistant: text "recent assistant two"
-        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work."
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history."
     `);
     expect(ctx.compactHistory()).toMatchInlineSnapshot(`[]`);
     await ctx.expectResumeMatches();
@@ -986,10 +986,10 @@ describe('FullCompaction', () => {
       [wire] full_compaction.begin       { "source": "auto", "time": "<time>" }
       [emit] compaction.started          { "trigger": "auto" }
       [emit] compaction.blocked          { "turnId": 0 }
-      [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 154, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 950000, "maxContextTokens": 256000, "contextUsage": 3.7109375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 154, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 154, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 183, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 950000, "maxContextTokens": 256000, "contextUsage": 3.7109375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 183, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 183, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] context.apply_compaction    { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nAuto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 127, "time": "<time>" }
-      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 127, "maxContextTokens": 256000, "contextUsage": 0.00049609375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 154, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 154, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 127, "maxContextTokens": 256000, "contextUsage": 0.00049609375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 183, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 183, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete    { "time": "<time>" }
       [emit] compaction.completed        { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nAuto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 127 } }
       [wire] context.append_loop_event   { "event": { "type": "step.begin", "uuid": "<uuid-1>", "turnId": "0", "step": 1 }, "time": "<time>" }
@@ -999,7 +999,7 @@ describe('FullCompaction', () => {
       [wire] context.append_loop_event   { "event": { "type": "step.end", "uuid": "<uuid-1>", "turnId": "0", "step": 1, "usage": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "end_turn" }, "time": "<time>" }
       [emit] turn.step.completed         { "turnId": 0, "step": 1, "stepId": "<uuid-1>", "usage": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "end_turn" }
       [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "turn", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 137, "maxContextTokens": 256000, "contextUsage": 0.00053515625, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 280, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 280, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 137, "maxContextTokens": 256000, "contextUsage": 0.00053515625, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 309, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 309, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [emit] turn.ended                  { "turnId": 0, "reason": "completed" }
     `);
     expect(ctx.llmInputs()).toMatchInlineSnapshot(`
@@ -1014,7 +1014,7 @@ describe('FullCompaction', () => {
           user: text "recent user three"
           assistant: text "recent assistant three"
           user: text "Answer after compacting"
-          user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work."
+          user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history."
 
       call 2:
         messages:
@@ -1294,9 +1294,9 @@ describe('FullCompaction', () => {
         max_context_tokens: 1_000_000,
       },
     });
-    // The auto-compact ratio moved from 0.85 to 0.9, so the context must sit
-    // above 90% of the 1_000_000 window (plus pending tokens) to trigger.
-    ctx.appendExchange(1, 'old user one', 'old assistant one', 890_000);
+    // The auto-compact ratio is 0.85, so the context alone (840k) sits below
+    // the 850k threshold and the pending prompt pushes it over.
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 840_000);
     const pendingPrompt = `ratio-pending-verbatim:${'x'.repeat(60_000)}`;
 
     ctx.mockNextResponse({ type: 'text', text: 'Ratio compacted summary.' });
@@ -1814,10 +1814,10 @@ describe('FullCompaction', () => {
       [wire] full_compaction.begin       { "source": "auto", "time": "<time>" }
       [emit] compaction.started          { "trigger": "auto" }
       [emit] compaction.blocked          { "turnId": 0 }
-      [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 116, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 0, "maxContextTokens": 1000000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 116, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 116, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 145, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 0, "maxContextTokens": 1000000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 145, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 145, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] context.apply_compaction    { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nFirst compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 114, "time": "<time>" }
-      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 114, "maxContextTokens": 1000000, "contextUsage": 0.000114, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 116, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 116, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 114, "maxContextTokens": 1000000, "contextUsage": 0.000114, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 145, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 145, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete    { "time": "<time>" }
       [emit] compaction.completed        { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nFirst compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 114 } }
       [wire] context.append_loop_event   { "event": { "type": "step.begin", "uuid": "<uuid-1>", "turnId": "0", "step": 1 }, "time": "<time>" }
@@ -1832,7 +1832,7 @@ describe('FullCompaction', () => {
       [wire] context.append_loop_event   { "event": { "type": "step.end", "uuid": "<uuid-1>", "turnId": "0", "step": 1, "usage": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "tool_use" }, "time": "<time>" }
       [emit] turn.step.completed         { "turnId": 0, "step": 1, "stepId": "<uuid-1>", "usage": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "tool_use" }
       [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "turn", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 126, "maxContextTokens": 1000000, "contextUsage": 0.000126, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 231, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 231, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 126, "maxContextTokens": 1000000, "contextUsage": 0.000126, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 260, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 260, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [emit] turn.step.interrupted       { "turnId": 0, "step": 2, "reason": "error", "message": "Compaction limit exceeded (1)" }
       [emit] turn.ended                  { "turnId": 0, "reason": "failed", "error": { "code": "context.overflow", "message": "Compaction limit exceeded (1)", "name": "KimiError", "details": { "maxCompactions": 1, "turnId": 0 }, "retryable": true } }
     `);
@@ -1845,7 +1845,7 @@ describe('FullCompaction', () => {
         tools: []
         messages:
           user: text "Trigger repeated compaction"
-          user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work."
+          user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history."
 
       call 2:
         messages:
diff --git a/packages/agent-core/test/agent/compaction/memento.test.ts b/packages/agent-core/test/agent/compaction/memento.test.ts
index 15e6cb780..523fcab2d 100644
--- a/packages/agent-core/test/agent/compaction/memento.test.ts
+++ b/packages/agent-core/test/agent/compaction/memento.test.ts
@@ -83,6 +83,19 @@ describe('selectRecentUserMessages', () => {
     expect(messageText(selected[1]!)).toBe('recent');
   });
 
+  it('truncates a CJK-heavy oldest message within the budget in one pass', () => {
+    const cjk = '中'.repeat(40_000);
+    const messages = [textMessage('user', cjk), textMessage('user', 'recent')];
+    const budget = estimateTokensForMessage(messages[1]!) + 1_000;
+
+    const selected = selectRecentUserMessages(messages, budget);
+
+    expect(selected).toHaveLength(2);
+    expect(messageText(selected[1]!)).toBe('recent');
+    expect(estimateTokens(messageText(selected[0]!))).toBeLessThanOrEqual(1_000);
+    expect(cjk.startsWith(messageText(selected[0]!))).toBe(true);
+  });
+
   it('returns nothing when the budget is zero', () => {
     expect(selectRecentUserMessages([textMessage('user', 'hi')], 0)).toEqual([]);
   });
diff --git a/packages/agent-core/test/agent/compaction/strategy.test.ts b/packages/agent-core/test/agent/compaction/strategy.test.ts
index d5eaa5ca9..e87aa4554 100644
--- a/packages/agent-core/test/agent/compaction/strategy.test.ts
+++ b/packages/agent-core/test/agent/compaction/strategy.test.ts
@@ -6,14 +6,14 @@ import {
 } from '../../../src/agent/compaction';
 
 describe('DefaultCompactionStrategy', () => {
-  it('triggers auto-compaction at 90% of the context window', () => {
+  it('triggers auto-compaction at 85% of the context window', () => {
     const strategy = new DefaultCompactionStrategy(() => 100_000, {
       ...DEFAULT_COMPACTION_CONFIG,
       reservedContextSize: 0,
     });
 
-    expect(strategy.shouldCompact(89_999)).toBe(false);
-    expect(strategy.shouldCompact(90_000)).toBe(true);
+    expect(strategy.shouldCompact(84_999)).toBe(false);
+    expect(strategy.shouldCompact(85_000)).toBe(true);
     expect(strategy.shouldCompact(100_000)).toBe(true);
   });
 
@@ -23,15 +23,15 @@ describe('DefaultCompactionStrategy', () => {
       reservedContextSize: 0,
     });
 
-    expect(strategy.shouldBlock(89_999)).toBe(false);
-    expect(strategy.shouldBlock(90_000)).toBe(true);
+    expect(strategy.shouldBlock(84_999)).toBe(false);
+    expect(strategy.shouldBlock(85_000)).toBe(true);
     expect(strategy.checkAfterStep).toBe(false);
   });
 
   it('reserves response context before the ratio threshold is reached', () => {
     const strategy = new DefaultCompactionStrategy(() => 256_000);
 
-    // 256k * 0.9 = 230_400, and the 50k reserve triggers at 206k.
+    // 256k * 0.85 = 217_600, and the 50k reserve triggers at 206k.
     expect(strategy.shouldCompact(210_000)).toBe(true);
     expect(strategy.shouldBlock(210_000)).toBe(true);
   });
@@ -78,8 +78,8 @@ describe('DefaultCompactionStrategy', () => {
 
 function testCompactionStrategy(maxSize: number = 1_000): DefaultCompactionStrategy {
   return new DefaultCompactionStrategy(() => maxSize, {
-    triggerRatio: 0.9,
-    blockRatio: 0.9,
+    triggerRatio: 0.85,
+    blockRatio: 0.85,
     reservedContextSize: 0,
     maxCompactionPerTurn: 3,
   });
diff --git a/packages/agent-core/test/services/message-transcript.test.ts b/packages/agent-core/test/services/message-transcript.test.ts
index b93cee676..c17f50b51 100644
--- a/packages/agent-core/test/services/message-transcript.test.ts
+++ b/packages/agent-core/test/services/message-transcript.test.ts
@@ -127,6 +127,39 @@ describe('reduceWireRecords', () => {
     expect(foldedLength).toBe(3);
   });
 
+  it('drops a late tool result after compaction closes an open exchange', () => {
+    const { entries, foldedLength } = reduceWireRecords([
+      appendMessage(userMessage('u1')),
+      loopEvent({ type: 'step.begin', uuid: 's1', turnId: 't', step: 0 }),
+      loopEvent({
+        type: 'tool.call',
+        uuid: 'c1',
+        turnId: 't',
+        step: 0,
+        stepUuid: 's1',
+        toolCallId: 'call_1',
+        name: 'Bash',
+        arguments: '{"command":"ls"}',
+      }),
+      compaction('SUM', 3),
+      loopEvent({
+        type: 'tool.result',
+        parentUuid: 'c1',
+        toolCallId: 'call_1',
+        result: { output: 'late result' },
+      }),
+      appendMessage(userMessage('u2')),
+    ]);
+
+    // Compaction closes the open exchange, so the late tool result is an
+    // orphan and dropped — matching ContextMemory — and the following user
+    // message is appended normally instead of being stranded in `deferred`.
+    expect(entries.map((e) => e.message.role)).toEqual(['user', 'assistant', 'user', 'user']);
+    expect(entries.map((e) => textOf(e.message))).toEqual(['u1', '', 'SUM', 'u2']);
+    // live folded view would be [u1, SUM, u2]
+    expect(foldedLength).toBe(3);
+  });
+
   it('undo removes through the last real user prompt and skips injections', () => {
     const { entries, foldedLength } = reduceWireRecords([
       appendMessage(userMessage('u1')),

From 53b92ec75ddd2454a505db9cd7d549ad95b41750 Mon Sep 17 00:00:00 2001
From: 7Sageer <7sageer@djwcb.cn>
Date: Mon, 29 Jun 2026 16:16:18 +0800
Subject: [PATCH 03/16] feat(agent-core): refresh system prompt after
 compaction

Re-render the cached system prompt with fresh runtime context (cwd listing, AGENTS.md, additional-dirs info, skill list) once compaction finishes, so post-compaction turns do not keep the bootstrap snapshot.

Cache the active profile on the Agent and expose refreshSystemPrompt(); FullCompaction invokes it after applyCompaction. This intentionally invalidates the prompt-cache prefix.
---
 .../agent-core/src/agent/compaction/full.ts   |  5 ++++
 packages/agent-core/src/agent/index.ts        | 30 +++++++++++++++++--
 packages/agent-core/src/session/index.ts      |  2 +-
 .../agent-core/src/session/subagent-host.ts   |  2 +-
 .../test/agent/compaction/full.test.ts        | 18 +++++++++++
 .../test/agent/compaction/memento.test.ts     | 15 ++++++++++
 6 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts
index 953e42866..245e75ae9 100644
--- a/packages/agent-core/src/agent/compaction/full.ts
+++ b/packages/agent-core/src/agent/compaction/full.ts
@@ -284,6 +284,11 @@ export class FullCompaction {
       const result = await this.compactionRound(signal, data);
       if (!result) return;
       this.markCompleted();
+      try {
+        await this.agent.refreshSystemPrompt();
+      } catch (error) {
+        this.agent.log.error('failed to refresh system prompt after compaction', { error });
+      }
       this.agent.emitEvent({ type: 'compaction.completed', result });
       await this.agent.injection.injectGoal();
       this.triggerPostCompactHook(data, result);
diff --git a/packages/agent-core/src/agent/index.ts b/packages/agent-core/src/agent/index.ts
index 4e733a80c..96aa460ec 100644
--- a/packages/agent-core/src/agent/index.ts
+++ b/packages/agent-core/src/agent/index.ts
@@ -11,7 +11,11 @@ import type { EnabledPluginSessionStart } from '#/plugin';
 
 import type { McpConnectionManager } from '../mcp';
 import { FlagResolver, type ExperimentalFlagResolver } from '../flags';
-import type { PreparedSystemPromptContext, ResolvedAgentProfile } from '../profile';
+import {
+  prepareSystemPromptContext,
+  type PreparedSystemPromptContext,
+  type ResolvedAgentProfile,
+} from '../profile';
 import type { ModelProvider } from '../session/provider-manager';
 import type { SessionSubagentHost } from '../session/subagent-host';
 import { noopTelemetryClient, type TelemetryClient } from '../telemetry';
@@ -127,6 +131,8 @@ export class Agent {
   readonly replayBuilder: ReplayBuilder;
 
   private additionalDirs: readonly string[];
+  private activeProfile?: ResolvedAgentProfile;
+  private brandHome?: string;
 
   constructor(options: AgentOptions) {
     this.type = options.type ?? 'main';
@@ -248,7 +254,13 @@ export class Agent {
     });
   }
 
-  useProfile(profile: ResolvedAgentProfile, context?: PreparedSystemPromptContext): void {
+  useProfile(
+    profile: ResolvedAgentProfile,
+    context?: PreparedSystemPromptContext,
+    brandHome?: string,
+  ): void {
+    this.activeProfile = profile;
+    this.brandHome = brandHome;
     const systemPrompt = profile.systemPrompt({
       osEnv: this.kaos.osEnv,
       cwd: this.config.cwd,
@@ -261,6 +273,20 @@ export class Agent {
     this.tools.setActiveTools(profile.tools);
   }
 
+  /**
+   * Re-render the system prompt with freshly gathered runtime context (cwd
+   * listing, AGENTS.md, additional-dirs info, skill list). Called after
+   * compaction so the post-compaction turns do not keep a snapshot captured
+   * at session bootstrap. Invalidates the prompt-cache prefix by design.
+   */
+  async refreshSystemPrompt(): Promise<void> {
+    if (this.activeProfile === undefined) return;
+    const context = await prepareSystemPromptContext(this.kaos, this.brandHome, {
+      additionalDirs: this.additionalDirs,
+    });
+    this.useProfile(this.activeProfile, context, this.brandHome);
+  }
+
   async resume(options?: AgentRecordsReplayOptions): Promise<{ warning?: string }> {
     const result = await this.records.replay(options);
     try {
diff --git a/packages/agent-core/src/session/index.ts b/packages/agent-core/src/session/index.ts
index a2bb022b5..948c28a49 100644
--- a/packages/agent-core/src/session/index.ts
+++ b/packages/agent-core/src/session/index.ts
@@ -470,7 +470,7 @@ export class Session {
       this.options.kimiHomeDir,
       { additionalDirs: this.additionalDirs },
     );
-    agent.useProfile(profile, context);
+    agent.useProfile(profile, context, this.options.kimiHomeDir);
     const { agentsMdWarning } = context;
     if (agentsMdWarning !== undefined) {
       this.agentsMdWarning = agentsMdWarning;
diff --git a/packages/agent-core/src/session/subagent-host.ts b/packages/agent-core/src/session/subagent-host.ts
index 5153acea5..01c3063f3 100644
--- a/packages/agent-core/src/session/subagent-host.ts
+++ b/packages/agent-core/src/session/subagent-host.ts
@@ -374,7 +374,7 @@ export class SessionSubagentHost {
       this.session.options.kimiHomeDir,
       { additionalDirs: child.getAdditionalDirs() },
     );
-    child.useProfile(profile, context);
+    child.useProfile(profile, context, this.session.options.kimiHomeDir);
     child.tools.inheritUserTools(parent.tools);
   }
 
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index 70b5dfcf0..15e751c23 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -136,6 +136,24 @@ describe('FullCompaction', () => {
     await ctx.expectResumeMatches();
   });
 
+  it('refreshes the system prompt after compaction completes', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
+    ctx.appendExchange(2, 'recent user two', 'recent assistant two', 40);
+
+    const refreshSpy = vi.spyOn(ctx.agent, 'refreshSystemPrompt');
+
+    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.completed');
+
+    expect(refreshSpy).toHaveBeenCalledTimes(1);
+  });
+
   it('projects the compacted prefix before sending the summary request', async () => {
     const ctx = testAgent({ compactionStrategy: alwaysCompactOnce });
     ctx.configure({
diff --git a/packages/agent-core/test/agent/compaction/memento.test.ts b/packages/agent-core/test/agent/compaction/memento.test.ts
index 523fcab2d..7591e3207 100644
--- a/packages/agent-core/test/agent/compaction/memento.test.ts
+++ b/packages/agent-core/test/agent/compaction/memento.test.ts
@@ -96,6 +96,21 @@ describe('selectRecentUserMessages', () => {
     expect(cjk.startsWith(messageText(selected[0]!))).toBe(true);
   });
 
+  it('does not split surrogate pairs while truncating emoji text', () => {
+    const emoji = '😀'.repeat(2_000);
+    const messages = [textMessage('user', emoji), textMessage('user', 'recent')];
+    const budget = estimateTokensForMessage(messages[1]!) + 333;
+
+    const selected = selectRecentUserMessages(messages, budget);
+    const truncated = messageText(selected[0]!);
+
+    expect(selected).toHaveLength(2);
+    expect(messageText(selected[1]!)).toBe('recent');
+    expect(estimateTokens(truncated)).toBeLessThanOrEqual(333);
+    expect([...truncated].every((char) => char === '😀')).toBe(true);
+    expect(truncated.length % 2).toBe(0);
+  });
+
   it('returns nothing when the budget is zero', () => {
     expect(selectRecentUserMessages([textMessage('user', 'hi')], 0)).toEqual([]);
   });

From 638c0f7e19853e6aff6b1a0e84d0cddffbad2377 Mon Sep 17 00:00:00 2001
From: 7Sageer <7sageer@djwcb.cn>
Date: Mon, 29 Jun 2026 16:28:22 +0800
Subject: [PATCH 04/16] fix(agent-core): re-inject plan reminder after
 compaction

---
 .../agent-core/src/agent/compaction/full.ts   |  2 +-
 .../agent-core/src/agent/injection/manager.ts |  5 ++
 .../src/agent/injection/plan-mode.ts          |  4 ++
 .../test/agent/compaction/full.test.ts        | 53 +++++++++++++++++++
 4 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts
index 245e75ae9..1fa183268 100644
--- a/packages/agent-core/src/agent/compaction/full.ts
+++ b/packages/agent-core/src/agent/compaction/full.ts
@@ -290,7 +290,7 @@ export class FullCompaction {
         this.agent.log.error('failed to refresh system prompt after compaction', { error });
       }
       this.agent.emitEvent({ type: 'compaction.completed', result });
-      await this.agent.injection.injectGoal();
+      await this.agent.injection.injectAfterCompaction();
       this.triggerPostCompactHook(data, result);
     } catch (error) {
       if (isAbortError(error)) return;
diff --git a/packages/agent-core/src/agent/injection/manager.ts b/packages/agent-core/src/agent/injection/manager.ts
index 99c9cd07e..009c3d682 100644
--- a/packages/agent-core/src/agent/injection/manager.ts
+++ b/packages/agent-core/src/agent/injection/manager.ts
@@ -40,6 +40,11 @@ export class InjectionManager {
     await this.activeGoalInjector()?.inject();
   }
 
+  async injectAfterCompaction(): Promise<void> {
+    await this.injectGoal();
+    await this.inject();
+  }
+
   onContextClear(): void {
     for (const injector of this.lifecycleInjectors()) {
       injector.onContextClear();
diff --git a/packages/agent-core/src/agent/injection/plan-mode.ts b/packages/agent-core/src/agent/injection/plan-mode.ts
index bbc0d557e..209351db4 100644
--- a/packages/agent-core/src/agent/injection/plan-mode.ts
+++ b/packages/agent-core/src/agent/injection/plan-mode.ts
@@ -22,6 +22,10 @@ export class PlanModeInjector extends DynamicInjector {
     this.wasActive = this.agent.planMode.isActive;
   }
 
+  override onContextCompacted(): void {
+    this.injectedAt = null;
+  }
+
   override async getInjection(): Promise<string | undefined> {
     const { isActive, planFilePath } = this.agent.planMode;
     if (!isActive) {
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index 15e751c23..034143112 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -1208,6 +1208,59 @@ describe('FullCompaction', () => {
     await ctx.expectResumeMatches();
   });
 
+  it('reinjects the plan-mode reminder after manual compaction', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    await ctx.agent.planMode.enter('compact-plan', false);
+    const planFilePath = ctx.agent.planMode.planFilePath;
+    if (planFilePath === null) throw new Error('plan file path missing');
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: 'draft the plan' }]);
+    await ctx.agent.injection.inject();
+    expect(ctx.compactHistory().at(-1)?.text).toContain(`Plan file: ${planFilePath}`);
+    const completed = ctx.once('compaction.completed');
+
+    ctx.mockNextResponse({ type: 'text', text: 'Plan-mode compacted summary.' });
+    await ctx.rpc.beginCompaction({});
+    await completed;
+
+    await vi.waitFor(() => {
+      const planReminders = ctx.agent.context.history.filter(
+        (message) => message.origin?.kind === 'injection' && message.origin.variant === 'plan_mode',
+      );
+      expect(planReminders).toHaveLength(1);
+      expect(messageText(planReminders[0])).toContain(`Plan file: ${planFilePath}`);
+    });
+    expect(ctx.compactHistory().at(-1)?.text).toContain(`Plan file: ${planFilePath}`);
+    await ctx.expectResumeMatches();
+  });
+
+  it('includes the plan-mode reminder in the answer request after auto compaction', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    await ctx.agent.planMode.enter('auto-compact-plan', false);
+    const planFilePath = ctx.agent.planMode.planFilePath;
+    if (planFilePath === null) throw new Error('plan file path missing');
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 100);
+    ctx.appendExchange(2, 'recent user two', 'recent assistant two', 950_000);
+    await ctx.agent.injection.inject();
+
+    ctx.mockNextResponse({ type: 'text', text: 'Auto plan compacted summary.' });
+    ctx.mockNextResponse({ type: 'text', text: 'I can answer with the plan path.' });
+    await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Continue the plan' }] });
+    await ctx.untilTurnEnd();
+
+    expect(ctx.llmCalls).toHaveLength(2);
+    const answerTexts = ctx.llmCalls[1]?.history.map(messageText) ?? [];
+    expect(answerTexts.some((text) => text.includes(`Plan file: ${planFilePath}`))).toBe(true);
+    await ctx.expectResumeMatches();
+  });
+
   it('does not auto compact small contexts when reserved size exceeds the model window', async () => {
     const ctx = testAgent({
       initialConfig: {

From 82ae5ee8cd090d9877c6dd9a646a5eab16851751 Mon Sep 17 00:00:00 2001
From: 7Sageer <7sageer@djwcb.cn>
Date: Mon, 29 Jun 2026 16:34:46 +0800
Subject: [PATCH 05/16] fix(agent-core): bound overflow compaction retries

---
 .../agent-core/src/agent/compaction/full.ts   | 42 +++++++++++-
 .../test/agent/compaction/full.test.ts        | 68 +++++++++++++++++++
 2 files changed, 109 insertions(+), 1 deletion(-)

diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts
index 1fa183268..1408f79f5 100644
--- a/packages/agent-core/src/agent/compaction/full.ts
+++ b/packages/agent-core/src/agent/compaction/full.ts
@@ -51,6 +51,14 @@ import {
 
 export const MAX_COMPACTION_RETRY_ATTEMPTS = 5;
 
+// Consecutive provider-overflow recoveries (overflow -> compact -> overflow
+// again) allowed in a single turn before we give up. Each successful step
+// resets the counter, so this only trips when compaction stops reducing the
+// request below the model window — i.e. the compacted floor itself no longer
+// fits. Without this cap the turn loop can compact forever on a small or
+// observed-to-be-small context window.
+const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
+
 const DEFAULT_COMPACTION_MAX_COMPLETION_TOKENS = 128 * 1024;
 const OVERFLOW_CONTEXT_SAFETY_RATIO = 0.85;
 const OVERFLOW_STATUS_RECOVERY_RATIO = 0.5;
@@ -77,6 +85,11 @@ export class FullCompaction {
   // checkAutoCompaction skips in that case even if an observed overflow
   // limit still flags the context as oversized.
   private lastCompactedTokenCount: number | null = null;
+  // Counts provider-overflow recoveries in this turn that have not yet been
+  // followed by a successful step. Trips MAX_OVERFLOW_COMPACTION_ATTEMPTS to
+  // stop an overflow -> compact -> overflow loop when compaction can no
+  // longer shrink the request below the model window.
+  private consecutiveOverflowCompactions = 0;
   protected readonly strategy: CompactionStrategy;
 
   constructor(
@@ -209,9 +222,18 @@ export class FullCompaction {
   resetForTurn(): void {
     this.compactionCountInTurn = 0;
     this.lastCompactedTokenCount = null;
+    this.consecutiveOverflowCompactions = 0;
   }
 
   async handleOverflowError(signal: AbortSignal, error: unknown) {
+    this.consecutiveOverflowCompactions += 1;
+    if (this.consecutiveOverflowCompactions > MAX_OVERFLOW_COMPACTION_ATTEMPTS) {
+      throw new KimiError(
+        ErrorCodes.CONTEXT_OVERFLOW,
+        `Compaction failed to bring the context under the model window after ${String(MAX_OVERFLOW_COMPACTION_ATTEMPTS)} attempts.`,
+        { cause: error instanceof Error ? error : undefined },
+      );
+    }
     const didStartCompaction = this.beginAutoCompaction();
     if (!didStartCompaction && !this.compacting) throw error;
     // Always block on overflow errors
@@ -226,6 +248,10 @@ export class FullCompaction {
   }
 
   async afterStep(): Promise<void> {
+    // A completed step means a generate() succeeded, so any prior
+    // overflow -> compact cycle produced a request that now fits; clear the
+    // loop guard.
+    this.consecutiveOverflowCompactions = 0;
     if (this.strategy.checkAfterStep) {
       this.checkAutoCompaction(false);
     }
@@ -401,7 +427,10 @@ export class FullCompaction {
             error instanceof CompactionTruncatedError ||
             error instanceof APIEmptyResponseError;
           if (isOverflow && historyForModel.length > 1) {
-            historyForModel = historyForModel.slice(1);
+            // Dropping a bare `slice(1)` can strand a tool result at the front,
+            // which the provider rejects as a malformed request. Trim any
+            // leading tool results along with the oldest message.
+            historyForModel = dropOldestMessageAndLeadingToolResults(historyForModel);
             retryCount = 0;
             continue;
           }
@@ -504,6 +533,17 @@ export class FullCompaction {
   }
 }
 
+function dropOldestMessageAndLeadingToolResults<T extends { readonly role: string }>(
+  messages: readonly T[],
+): T[] {
+  if (messages.length <= 1) return messages.slice();
+  let start = 1;
+  while (start < messages.length && messages[start]!.role === 'tool') {
+    start += 1;
+  }
+  return messages.slice(start);
+}
+
 function extractCompactionSummary(response: GenerateResult): string {
   const summary =
     typeof response.message.content === 'string'
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index 034143112..ad5d7c817 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -1476,6 +1476,74 @@ describe('FullCompaction', () => {
     await ctx.expectResumeMatches();
   });
 
+  it('stops repeated provider-overflow compactions when the compacted context still overflows', async () => {
+    let callCount = 0;
+    const generate: GenerateFn = async (_provider, _system, _tools, history) => {
+      callCount += 1;
+      if (messageText(history.at(-1)).includes('CONTEXT CHECKPOINT COMPACTION')) {
+        return textResult(`Still too large summary ${String(callCount)}.`);
+      }
+      throw new APIContextOverflowError(400, 'Context length exceeded', `req-overflow-${String(callCount)}`);
+    };
+    const ctx = testAgent({ generate });
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
+
+    await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Retry until overflow guard' }] });
+    const events = await ctx.untilTurnEnd();
+
+    expect(countEvents(events, 'compaction.started')).toBe(3);
+    expect(callCount).toBe(7);
+    expect(events).toContainEqual(
+      expect.objectContaining({
+        event: 'turn.ended',
+        args: expect.objectContaining({
+          reason: 'failed',
+          error: expect.objectContaining({
+            code: 'context.overflow',
+            message: 'Compaction failed to bring the context under the model window after 3 attempts.',
+          }),
+        }),
+      }),
+    );
+  });
+
+  it('does not leave an orphan tool result at the start when reducing overflowing compaction input', async () => {
+    const inputs: string[][] = [];
+    let callCount = 0;
+    const generate: GenerateFn = async (_provider, _system, _tools, history) => {
+      callCount += 1;
+      inputs.push(inputHistorySnapshot(history));
+      if (callCount <= 2) {
+        throw new APIContextOverflowError(400, 'Context length exceeded', `req-compact-overflow-${String(callCount)}`);
+      }
+      return textResult('Reduced tool history summary.');
+    };
+    const ctx = testAgent({ generate });
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.appendToolExchange();
+    const compacted = ctx.once('context.apply_compaction');
+
+    await ctx.rpc.beginCompaction({});
+    await compacted;
+
+    expect(inputs).toHaveLength(3);
+    expect(inputs[1]?.map((entry) => entry.split(':', 1)[0])).toEqual([
+      'assistant',
+      'tool',
+      'user',
+    ]);
+    expect(inputs[2]?.map((entry) => entry.split(':', 1)[0])).toEqual(['user']);
+    expect(inputs[2]?.[0]).toBe('user: <compaction-instruction>');
+    await ctx.expectResumeMatches();
+  });
+
   it('recovers from plain 413 when estimated request is over effective max', async () => {
     let callCount = 0;
     const generate: GenerateFn = async (_provider, _system, _tools, _history, callbacks) => {

From 5a3ddf579f9757fc272ee37ebbf2995041f040ea Mon Sep 17 00:00:00 2001
From: 7Sageer <7sageer@djwcb.cn>
Date: Mon, 29 Jun 2026 17:03:01 +0800
Subject: [PATCH 06/16] fix(agent-core): reinject auto permission reminder
 after compaction

---
 .../src/agent/injection/permission-mode.ts    |  9 ++++-
 .../agent-core/test/agent/permission.test.ts  | 40 +++++++++++++++++++
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/packages/agent-core/src/agent/injection/permission-mode.ts b/packages/agent-core/src/agent/injection/permission-mode.ts
index 638ed6760..9ba024b15 100644
--- a/packages/agent-core/src/agent/injection/permission-mode.ts
+++ b/packages/agent-core/src/agent/injection/permission-mode.ts
@@ -15,13 +15,20 @@ const AUTO_MODE_EXIT_REMINDER = [
 export class PermissionModeInjector extends DynamicInjector {
   protected override readonly injectionVariant = 'permission_mode';
   private lastMode: PermissionMode | undefined;
+  private refreshAfterCompaction = false;
+
+  override onContextCompacted(_compactedCount: number): void {
+    this.injectedAt = null;
+    this.refreshAfterCompaction = true;
+  }
 
   getInjection(): string | undefined {
     const mode = this.agent.permission.mode;
     const previousMode = this.lastMode;
 
-    if (mode === previousMode) return undefined;
+    if (!this.refreshAfterCompaction && mode === previousMode) return undefined;
 
+    this.refreshAfterCompaction = false;
     this.lastMode = mode;
     if (mode === 'auto') return AUTO_MODE_ENTER_REMINDER;
     if (previousMode === 'auto') return AUTO_MODE_EXIT_REMINDER;
diff --git a/packages/agent-core/test/agent/permission.test.ts b/packages/agent-core/test/agent/permission.test.ts
index 8d0b27712..89c7d92ad 100644
--- a/packages/agent-core/test/agent/permission.test.ts
+++ b/packages/agent-core/test/agent/permission.test.ts
@@ -276,6 +276,46 @@ describe('Permission auto mode', () => {
     );
   });
 
+  it('reinjects the auto mode reminder after context compaction', async () => {
+    const appendSystemReminder = vi.fn();
+    const agent = {
+      permission: { mode: 'auto' },
+      context: { history: [], appendSystemReminder },
+    } as unknown as Agent;
+    const injector = new PermissionModeInjector(agent);
+
+    await injector.inject();
+    appendSystemReminder.mockClear();
+    injector.onContextCompacted(1);
+    await injector.inject();
+
+    expect(appendSystemReminder).toHaveBeenCalledWith(
+      expect.stringContaining('Do NOT call AskUserQuestion while auto mode is active'),
+      { kind: 'injection', variant: 'permission_mode' },
+    );
+  });
+
+  it('keeps the auto mode exit reminder after compaction if the mode changes', async () => {
+    const appendSystemReminder = vi.fn();
+    const permission = { mode: 'auto' as PermissionMode };
+    const agent = {
+      permission,
+      context: { history: [], appendSystemReminder },
+    } as unknown as Agent;
+    const injector = new PermissionModeInjector(agent);
+
+    await injector.inject();
+    appendSystemReminder.mockClear();
+    injector.onContextCompacted(1);
+    permission.mode = 'manual';
+    await injector.inject();
+
+    expect(appendSystemReminder).toHaveBeenCalledWith(
+      expect.stringContaining('Auto permission mode is no longer active'),
+      { kind: 'injection', variant: 'permission_mode' },
+    );
+  });
+
   it('blocks AskUserQuestion in auto mode before execution', async () => {
     const { manager, requestApproval } = makePermissionManager(async () => ({
       decision: 'approved',

From ececfb164303edddc2735cfe762243969dd5f760 Mon Sep 17 00:00:00 2001
From: 7Sageer <7sageer@djwcb.cn>
Date: Mon, 29 Jun 2026 17:30:13 +0800
Subject: [PATCH 07/16] fix(agent-core): compaction rework follow-ups

- Record keptUserMessageCount on the wire so transcript replay reproduces
  the live folded length after truncation.
- Flush steered messages after compaction so notifications land in the
  post-compaction context instead of being dropped.
- Unify real-user-input detection across context, transcript, and vis.
- Reset injector state correctly after compaction.
- Make the overflow compaction retry cap configurable.
- Sync the vis context projector to the kept-users-plus-summary shape.
---
 .changeset/compaction-internals-cleanup.md    |  5 ++
 apps/vis/server/src/lib/context-projector.ts  | 83 ++++++++++---------
 .../sample-compaction/agents/main/wire.jsonl  |  3 +-
 .../server/test/lib/context-projector.test.ts | 76 ++++++++++-------
 apps/vis/server/test/routes/context.test.ts   | 17 ++--
 .../agent-core/src/agent/compaction/full.ts   | 32 ++-----
 .../src/agent/compaction/strategy.ts          | 14 +++-
 .../agent-core/src/agent/compaction/types.ts  | 12 +++
 .../agent-core/src/agent/context/index.ts     | 33 ++++++--
 .../src/agent/injection/injector.ts           |  7 +-
 .../agent-core/src/agent/injection/manager.ts |  4 +-
 .../src/agent/injection/permission-mode.ts    |  2 +-
 .../src/agent/injection/plan-mode.ts          |  4 -
 packages/agent-core/src/agent/turn/index.ts   |  7 +-
 packages/agent-core/src/index.ts              |  6 ++
 .../src/services/message/transcript.ts        | 35 ++++----
 .../test/agent/compaction/full.test.ts        |  2 +
 .../test/agent/compaction/memento.test.ts     |  2 +-
 .../test/agent/compaction/strategy.test.ts    |  3 +
 .../agent-core/test/agent/context.test.ts     |  4 -
 .../test/agent/injection/manager.test.ts      | 14 ++--
 .../agent-core/test/agent/permission.test.ts  |  4 +-
 .../test/services/message-transcript.test.ts  | 24 ++++++
 packages/protocol/src/events.ts               | 10 +++
 24 files changed, 245 insertions(+), 158 deletions(-)
 create mode 100644 .changeset/compaction-internals-cleanup.md

diff --git a/.changeset/compaction-internals-cleanup.md b/.changeset/compaction-internals-cleanup.md
new file mode 100644
index 000000000..7558ffd7b
--- /dev/null
+++ b/.changeset/compaction-internals-cleanup.md
@@ -0,0 +1,5 @@
+---
+"@moonshot-ai/kimi-code": patch
+---
+
+Tighten compaction bookkeeping so compacted history stays consistent across retries.
diff --git a/apps/vis/server/src/lib/context-projector.ts b/apps/vis/server/src/lib/context-projector.ts
index fd7a376e6..9e3722455 100644
--- a/apps/vis/server/src/lib/context-projector.ts
+++ b/apps/vis/server/src/lib/context-projector.ts
@@ -1,3 +1,9 @@
+import {
+  COMPACT_USER_MESSAGE_MAX_TOKENS,
+  collectCompactableUserMessages,
+  isRealUserInput,
+  selectRecentUserMessages,
+} from '@moonshot-ai/agent-core';
 import type {
   ContentPart,
   ContextMessage,
@@ -234,19 +240,21 @@ export function projectContext(
         break;
       case 'context.apply_compaction': {
         openSteps = new Map();
-        // Mirror agent-core's actual `applyCompaction` behaviour
-        // (`packages/agent-core/src/agent/context/index.ts`): history becomes
-        // `[summaryBubble, ...history.slice(compactedCount)]`. The summary is
-        // an *assistant* message tagged `origin.kind = 'compaction_summary'`
-        // (using 'system' would skew role counts and any downstream diff
-        // against agent-core history). The post-compaction tail is preserved
-        // rather than dropped, so messages still in context stay visible.
+        // Mirror agent-core's `applyCompaction`
+        // (`packages/agent-core/src/agent/context/index.ts`): the live history
+        // becomes the most recent real user messages (verbatim, within a token
+        // budget) followed by a single user-role summary tagged
+        // `origin.kind = 'compaction_summary'`. Assistant messages, tool calls,
+        // and tool results are dropped. The selection rule
+        // (`selectRecentUserMessages` / `collectCompactableUserMessages`) is the
+        // same helper agent-core's `ContextMemory` and the web transcript
+        // reducer apply, so all three views stay in sync.
         const summaryBubble: ProjectedMessage = {
           lineNo: entry.lineNo,
           time: rec.time,
           source: 'compaction_summary',
           message: {
-            role: 'assistant',
+            role: 'user',
             content: [{ type: 'text', text: rec.summary }],
             toolCalls: [],
             origin: { kind: 'compaction_summary' },
@@ -259,33 +267,38 @@ export function projectContext(
           },
         };
         if (mode === 'model') {
-          // Drop the first `rec.compactedCount` HISTORY entries (NOT array
-          // entries): agent-core's `compactedCount` indexes into `_history`,
-          // which never contains our synthetic 'undo'/'clear' markers. Walk the
-          // array counting only history entries (`isHistoryEntry`) until
-          // `compactedCount` are passed, then slice there — any UI-only markers
-          // in the dropped region go with it (correct: they precede the
-          // compaction). With no markers this is exactly `slice(compactedCount)`.
-          let sliceAt = messages.length;
-          let passed = 0;
-          for (let i = 0; i < messages.length; i++) {
-            if (passed >= rec.compactedCount) {
-              sliceAt = i;
-              break;
-            }
-            if (isHistoryEntry(messages[i]!)) passed++;
-          }
-          if (passed < rec.compactedCount) sliceAt = messages.length;
-          messages = [summaryBubble, ...messages.slice(sliceAt)];
+          // Rebuild the model's-eye view as the kept user messages + summary.
+          // `realUserEntries` is filtered with the exact
+          // `collectCompactableUserMessages` predicate so it stays aligned with
+          // the selection below (genuine user input only — no injections,
+          // system triggers, or prior summaries). `selectRecentUserMessages`
+          // keeps a contiguous suffix of that subsequence, with only the oldest
+          // kept message possibly truncated, so each kept message maps back onto
+          // its original ProjectedMessage wrapper (preserving line/time); we swap
+          // in the (possibly truncated) message object.
+          const historyEntries = messages.filter(isHistoryEntry);
+          const realUserEntries = historyEntries.filter(
+            (pm) => collectCompactableUserMessages([pm.message]).length === 1,
+          );
+          const keptUserMessages = selectRecentUserMessages(
+            realUserEntries.map((pm) => pm.message),
+            COMPACT_USER_MESSAGE_MAX_TOKENS,
+          );
+          const suffixStart = realUserEntries.length - keptUserMessages.length;
+          const keptEntries: ProjectedMessage[] = keptUserMessages.map((message, i) => {
+            const original = realUserEntries[suffixStart + i]!;
+            return original.message === message ? original : { ...original, message };
+          });
+          messages = [...keptEntries, summaryBubble];
         } else {
           // Full history: keep ALL preceding messages, just append the summary
           // marker inline so the compacted prefix stays visible.
           messages.push(summaryBubble);
         }
         // Mirror agent-core applyCompaction() → microCompaction.reset() (cutoff
-        // → 0): the message list is rebuilt as [summary, ...tail], so the old
-        // index-based cutoff no longer points at the same messages. (In full
-        // mode the blanking pass does not run, so this is a no-op there.)
+        // → 0): the message list is rebuilt, so the old index-based cutoff no
+        // longer points at the same messages. (In full mode the blanking pass
+        // does not run, so this is a no-op there.)
         microCutoff = 0;
         // Mirror agent-core applyCompaction() → _tokenCount = result.tokensAfter:
         // the live context-window fill is now the post-compaction count. Derived
@@ -577,16 +590,6 @@ function isHistoryEntry(pm: ProjectedMessage): boolean {
   return pm.source !== 'undo' && pm.source !== 'clear';
 }
 
-/** Mirrors agent-core `isRealUserPrompt` (`agent/context/index.ts`): a message
- *  counts toward an undo only if it is a genuine user prompt. */
-function isRealUserPrompt(message: ContextMessage): boolean {
-  if (message.role !== 'user') return false;
-  const origin = message.origin;
-  if (origin === undefined || origin.kind === 'user') return true;
-  if (origin.kind === 'skill_activation') return origin.trigger === 'user-slash';
-  return false;
-}
-
 /** Single source of truth for the `context.undo` backward walk, shared by both
  *  projection modes. Mirrors agent-core `undo` (`agent/context/index.ts`): walk
  *  from the end, skip `origin.kind === 'injection'` (those are KEPT even when
@@ -612,7 +615,7 @@ function computeUndoCutoff(
     if (origin?.kind === 'compaction_summary') break; // stop
     removedMessageCount++;
     cutoff = i;
-    if (isRealUserPrompt(messages[i]!.message) && ++removedUserCount >= count) break;
+    if (isRealUserInput(messages[i]!.message) && ++removedUserCount >= count) break;
   }
   return { cutoff, removedMessageCount };
 }
diff --git a/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl b/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl
index 317df60b2..9f44d9a7d 100644
--- a/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl
+++ b/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl
@@ -1,5 +1,6 @@
 {"type":"metadata","protocol_version":"1.1","created_at":1779256791085}
 {"type":"config.update","cwd":"/tmp/work","profileName":"agent","systemPrompt":"You are Kimi.","time":1779256791100}
 {"type":"context.append_message","message":{"role":"user","content":[{"type":"text","text":"before compaction"}],"toolCalls":[]},"time":1779256800001}
-{"type":"context.apply_compaction","summary":"compacted summary","compactedCount":1,"tokensBefore":100,"tokensAfter":30,"time":1779256800500}
+{"type":"context.append_message","message":{"role":"assistant","content":[{"type":"text","text":"assistant reply"}],"toolCalls":[]},"time":1779256800200}
+{"type":"context.apply_compaction","summary":"compacted summary","compactedCount":2,"tokensBefore":100,"tokensAfter":30,"time":1779256800500}
 {"type":"context.append_message","message":{"role":"user","content":[{"type":"text","text":"after compaction"}],"toolCalls":[]},"time":1779256801000}
diff --git a/apps/vis/server/test/lib/context-projector.test.ts b/apps/vis/server/test/lib/context-projector.test.ts
index d2a2d3f4c..a4be1196e 100644
--- a/apps/vis/server/test/lib/context-projector.test.ts
+++ b/apps/vis/server/test/lib/context-projector.test.ts
@@ -262,33 +262,41 @@ describe('context-projector', () => {
       { lineNo: 4, data: { type: 'context.append_message' as const, message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'new' }], toolCalls: [] } }, raw: {} },
     ];
     const proj = projectContext(entries as any);
-    expect(proj.messages[0]!.source).toBe('compaction_summary');
-    // Compaction summary is an assistant message (agent-core's own
+    // Model view: the kept user prompt + user-role summary + the new prompt.
+    expect(proj.messages.map((m) => m.source)).toEqual([
+      'append_message', 'compaction_summary', 'append_message',
+    ]);
+    expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'old' });
+    // The compaction summary is a user message (agent-core's own
     // representation), not a synthetic system message.
-    expect(proj.messages[0]!.message.role).toBe('assistant');
-    expect(proj.messages[0]!.message.origin).toEqual({ kind: 'compaction_summary' });
-    expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'old stuff' });
-    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'new' });
+    expect(proj.messages[1]!.message.role).toBe('user');
+    expect(proj.messages[1]!.message.origin).toEqual({ kind: 'compaction_summary' });
+    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'old stuff' });
+    expect(proj.messages[2]!.message.content[0]).toMatchObject({ text: 'new' });
   });
 
-  it('apply_compaction keeps the post-compaction tail (slice(compactedCount))', () => {
+  it('apply_compaction keeps the most recent user messages and drops the assistant/tool tail', () => {
     const entries = [
       { lineNo: 1, data: { type: 'context.append_message' as const,
           message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'm0' }], toolCalls: [] } }, raw: {} },
       { lineNo: 2, data: { type: 'context.append_message' as const,
           message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'm1' }], toolCalls: [] } }, raw: {} },
       { lineNo: 3, data: { type: 'context.append_message' as const,
-          message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'm2 (kept)' }], toolCalls: [] } }, raw: {} },
+          message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'm2 (dropped)' }], toolCalls: [] } }, raw: {} },
       { lineNo: 4, data: { type: 'context.apply_compaction' as const,
-          summary: 'sum', compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
+          summary: 'sum', compactedCount: 3, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
     ];
     const proj = projectContext(entries as any);
-    // [summary, m2] — m0 and m1 (the first compactedCount=2) are dropped, m2 kept.
-    expect(proj.messages).toHaveLength(2);
-    expect(proj.messages[0]!.source).toBe('compaction_summary');
-    expect(proj.messages[0]!.compaction).toEqual({ compactedCount: 2, tokensBefore: 100, tokensAfter: 10 });
-    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'm2 (kept)' });
-    expect(proj.messages[1]!.lineNo).toBe(3);
+    // [m0, m1, summary] — real user prompts are kept verbatim, the assistant
+    // tail is dropped.
+    expect(proj.messages).toHaveLength(3);
+    expect(proj.messages.map((m) => m.source)).toEqual([
+      'append_message', 'append_message', 'compaction_summary',
+    ]);
+    expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'm0' });
+    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'm1' });
+    expect(proj.messages[2]!.compaction).toEqual({ compactedCount: 3, tokensBefore: 100, tokensAfter: 10 });
+    expect(proj.messages[2]!.message.content[0]).toMatchObject({ text: 'sum' });
   });
 
   // ---- Fix ④: UI-only markers must not offset agent-core history indices ------
@@ -298,7 +306,7 @@ describe('context-projector', () => {
   // real history entries (append_message + compaction_summary), skipping
   // 'undo'/'clear' markers.
 
-  it('apply_compaction slices by history index, skipping a preceding undo marker (model)', () => {
+  it('apply_compaction keeps user messages across a preceding undo marker (model)', () => {
     const userMsg = (text: string) => ({
       role: 'user' as const, content: [{ type: 'text' as const, text }], toolCalls: [],
       origin: { kind: 'user' as const },
@@ -306,14 +314,10 @@ describe('context-projector', () => {
     // Step 1: append u1, u2 then undo(1) → removes u2, leaves [u1, <undo marker>].
     // Step 2: append u3, u4 → array is [u1, <undo marker>, u3, u4].
     // History entries (agent-core _history, which has NO marker) are the three
-    // real messages [u1, u3, u4]. A compaction with compactedCount=2 drops the
-    // first 2 HISTORY entries (u1, u3) — and the undo marker that sits within
-    // that compacted prefix is dropped with it — keeping exactly [summary, u4].
-    //
-    // The naive `messages.slice(compactedCount=2)` would instead cut the ARRAY at
-    // index 2, yielding [summary, u3, u4] — it WRONGLY retains the already-
-    // compacted u3 because the undo marker offset the index by one. This test
-    // pins the correct history-aware behaviour and FAILS against the naive slice.
+    // real user prompts [u1, u3, u4]. Compaction keeps all of them (they fit the
+    // budget) and appends the summary, dropping only the synthetic undo marker.
+    // This pins that the marker does not offset the kept-user selection — a naive
+    // array-slice would have retained the wrong prompts.
     const entries = [
       { lineNo: 1, data: { type: 'context.append_message' as const, message: userMsg('u1') }, raw: {} },
       { lineNo: 2, data: { type: 'context.append_message' as const, message: userMsg('u2') }, raw: {} },
@@ -321,12 +325,16 @@ describe('context-projector', () => {
       { lineNo: 4, data: { type: 'context.append_message' as const, message: userMsg('u3') }, raw: {} },
       { lineNo: 5, data: { type: 'context.append_message' as const, message: userMsg('u4') }, raw: {} },
       { lineNo: 6, data: { type: 'context.apply_compaction' as const,
-          summary: 'sum', compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
+          summary: 'sum', compactedCount: 3, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
     ];
     const proj = projectContext(entries as any);
-    // Correct: [summary, u4]. The marker and the first 2 history entries are gone.
-    expect(proj.messages.map((m) => m.source)).toEqual(['compaction_summary', 'append_message']);
-    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'u4' });
+    // Correct: [u1, u3, u4, summary]. The marker is gone, all real prompts kept.
+    expect(proj.messages.map((m) => m.source)).toEqual([
+      'append_message', 'append_message', 'append_message', 'compaction_summary',
+    ]);
+    expect(proj.messages.map((m) => m.message.content[0])).toMatchObject([
+      { text: 'u1' }, { text: 'u3' }, { text: 'u4' }, { text: 'sum' },
+    ]);
   });
 
   it('micro-blanking uses the history index, skipping a preceding undo marker (model)', () => {
@@ -675,7 +683,7 @@ describe('context-projector', () => {
   // marker but do NOT mutate/drop the surrounding message list. 'model' mode
   // (the default) keeps the existing model's-eye behaviour byte-identical.
 
-  it("defaults to 'model' mode when no 2nd arg is passed (compaction drops the prefix)", () => {
+  it("defaults to 'model' mode when no 2nd arg is passed (keeps recent user messages + summary)", () => {
     const entries = [
       { lineNo: 1, data: { type: 'context.append_message' as const,
           message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'm0' }], toolCalls: [] } }, raw: {} },
@@ -684,10 +692,14 @@ describe('context-projector', () => {
       { lineNo: 3, data: { type: 'context.apply_compaction' as const,
           summary: 'sum', compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
     ];
-    // No 2nd arg → 'model' default: prefix dropped, only the summary remains.
+    // No 2nd arg → 'model' default: the real user prompts are kept verbatim and
+    // the summary is appended after them.
     const proj = projectContext(entries as any);
-    expect(proj.messages).toHaveLength(1);
-    expect(proj.messages[0]!.source).toBe('compaction_summary');
+    expect(proj.messages.map((m) => m.source)).toEqual([
+      'append_message', 'append_message', 'compaction_summary',
+    ]);
+    expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'm0' });
+    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'm1' });
   });
 
   it("full mode keeps the pre-compaction messages plus the summary marker plus the tail", () => {
diff --git a/apps/vis/server/test/routes/context.test.ts b/apps/vis/server/test/routes/context.test.ts
index 486e6175d..6352747e9 100644
--- a/apps/vis/server/test/routes/context.test.ts
+++ b/apps/vis/server/test/routes/context.test.ts
@@ -69,28 +69,31 @@ describe('context route', () => {
     cleanup = c;
     const app = contextRoute(home);
 
-    // Default (model view): the pre-compaction message is dropped, leaving
-    // [summary, after-compaction].
+    // Default (model view): the real user prompt before compaction is KEPT, the
+    // assistant reply is dropped, then the summary, then the post-compaction tail.
     const modelRes = await app.request('/session_fixture/context?agent=main');
     expect(modelRes.status).toBe(200);
     const modelBody = (await modelRes.json()) as {
       messages: { source: string; message: { content: { type: string; text?: string }[] } }[];
     };
     expect(modelBody.messages.map((m) => m.source)).toEqual([
-      'compaction_summary', 'append_message',
+      'append_message', 'compaction_summary', 'append_message',
     ]);
+    expect(modelBody.messages[0]!.message.content[0]).toMatchObject({ text: 'before compaction' });
+    expect(modelBody.messages[2]!.message.content[0]).toMatchObject({ text: 'after compaction' });
 
-    // Full history: the pre-compaction message is KEPT, then the summary marker,
-    // then the post-compaction tail.
+    // Full history: every pre-compaction message (user prompt + assistant reply)
+    // is KEPT, then the summary marker, then the post-compaction tail.
     const fullRes = await app.request('/session_fixture/context?agent=main&history=full');
     expect(fullRes.status).toBe(200);
     const fullBody = (await fullRes.json()) as {
       messages: { source: string; message: { content: { type: string; text?: string }[] } }[];
     };
     expect(fullBody.messages.map((m) => m.source)).toEqual([
-      'append_message', 'compaction_summary', 'append_message',
+      'append_message', 'append_message', 'compaction_summary', 'append_message',
     ]);
     expect(fullBody.messages[0]!.message.content[0]).toMatchObject({ text: 'before compaction' });
-    expect(fullBody.messages[2]!.message.content[0]).toMatchObject({ text: 'after compaction' });
+    expect(fullBody.messages[1]!.message.content[0]).toMatchObject({ text: 'assistant reply' });
+    expect(fullBody.messages[3]!.message.content[0]).toMatchObject({ text: 'after compaction' });
   });
 });
diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts
index 1408f79f5..642623c1c 100644
--- a/packages/agent-core/src/agent/compaction/full.ts
+++ b/packages/agent-core/src/agent/compaction/full.ts
@@ -42,23 +42,10 @@ import {
   DefaultCompactionStrategy,
   type CompactionStrategy,
 } from './strategy';
-import {
-  COMPACT_USER_MESSAGE_MAX_TOKENS,
-  buildCompactionSummaryText,
-  collectCompactableUserMessages,
-  selectRecentUserMessages,
-} from './memento';
+import { buildCompactionSummaryText } from './memento';
 
 export const MAX_COMPACTION_RETRY_ATTEMPTS = 5;
 
-// Consecutive provider-overflow recoveries (overflow -> compact -> overflow
-// again) allowed in a single turn before we give up. Each successful step
-// resets the counter, so this only trips when compaction stops reducing the
-// request below the model window — i.e. the compacted floor itself no longer
-// fits. Without this cap the turn loop can compact forever on a small or
-// observed-to-be-small context window.
-const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
-
 const DEFAULT_COMPACTION_MAX_COMPLETION_TOKENS = 128 * 1024;
 const OVERFLOW_CONTEXT_SAFETY_RATIO = 0.85;
 const OVERFLOW_STATUS_RECOVERY_RATIO = 0.5;
@@ -227,10 +214,11 @@ export class FullCompaction {
 
   async handleOverflowError(signal: AbortSignal, error: unknown) {
     this.consecutiveOverflowCompactions += 1;
-    if (this.consecutiveOverflowCompactions > MAX_OVERFLOW_COMPACTION_ATTEMPTS) {
+    const maxAttempts = this.strategy.maxOverflowCompactionAttempts;
+    if (this.consecutiveOverflowCompactions > maxAttempts) {
       throw new KimiError(
         ErrorCodes.CONTEXT_OVERFLOW,
-        `Compaction failed to bring the context under the model window after ${String(MAX_OVERFLOW_COMPACTION_ATTEMPTS)} attempts.`,
+        `Compaction failed to bring the context under the model window after ${String(maxAttempts)} attempts.`,
         { cause: error instanceof Error ? error : undefined },
       );
     }
@@ -459,18 +447,11 @@ export class FullCompaction {
       }
 
       const summaryText = buildCompactionSummaryText(this.postProcessSummary(summary ?? ''));
-      const keptUserMessages = selectRecentUserMessages(
-        collectCompactableUserMessages(originalHistory),
-        COMPACT_USER_MESSAGE_MAX_TOKENS,
-      );
-      const tokensAfter = estimateTokens(summaryText) + estimateTokensForMessages(keptUserMessages);
-
-      const result: CompactionResult = {
+      const result = this.agent.context.applyCompaction({
         summary: summaryText,
         compactedCount: originalHistory.length,
         tokensBefore,
-        tokensAfter,
-      };
+      });
 
       this.agent.telemetry.track('compaction_finished', {
         tokensBefore: result.tokensBefore,
@@ -483,7 +464,6 @@ export class FullCompaction {
         ...usage,
         ...data,
       });
-      this.agent.context.applyCompaction(result);
       this.lastCompactedTokenCount = result.tokensAfter;
       return result;
     } catch (error) {
diff --git a/packages/agent-core/src/agent/compaction/strategy.ts b/packages/agent-core/src/agent/compaction/strategy.ts
index 9fb8e8b41..d409d6e8d 100644
--- a/packages/agent-core/src/agent/compaction/strategy.ts
+++ b/packages/agent-core/src/agent/compaction/strategy.ts
@@ -9,10 +9,16 @@ export interface CompactionConfig {
   reservedContextSize: number;
   /** Maximum number of auto-compactions allowed in a single turn. */
   maxCompactionPerTurn: number;
+  /**
+   * Consecutive provider-overflow recoveries (overflow -> compact -> overflow
+   * again) allowed in a single turn before giving up. Caps the loop when
+   * compaction can no longer shrink the request below the model window.
+   */
+  maxOverflowCompactionAttempts: number;
 }
 
 /**
- * Auto-compact at 90% of the resolved context window. `blockRatio` matches
+ * Auto-compact at 85% of the resolved context window. `blockRatio` matches
  * `triggerRatio` so compaction runs synchronously with no background
  * compaction.
  */
@@ -21,6 +27,7 @@ export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = {
   blockRatio: 0.85,
   reservedContextSize: 50_000,
   maxCompactionPerTurn: Infinity,
+  maxOverflowCompactionAttempts: 3,
 };
 
 export interface CompactionStrategy {
@@ -28,6 +35,7 @@ export interface CompactionStrategy {
   shouldBlock(usedSize: number): boolean;
   readonly checkAfterStep: boolean;
   readonly maxCompactionPerTurn: number;
+  readonly maxOverflowCompactionAttempts: number;
 }
 
 export class DefaultCompactionStrategy implements CompactionStrategy {
@@ -68,6 +76,10 @@ export class DefaultCompactionStrategy implements CompactionStrategy {
   get maxCompactionPerTurn(): number {
     return this.config.maxCompactionPerTurn;
   }
+
+  get maxOverflowCompactionAttempts(): number {
+    return this.config.maxOverflowCompactionAttempts;
+  }
 }
 
 export type { CompactionSource };
diff --git a/packages/agent-core/src/agent/compaction/types.ts b/packages/agent-core/src/agent/compaction/types.ts
index 820365cdc..511782b7b 100644
--- a/packages/agent-core/src/agent/compaction/types.ts
+++ b/packages/agent-core/src/agent/compaction/types.ts
@@ -3,8 +3,20 @@ export interface CompactionResult {
   compactedCount: number;
   tokensBefore: number;
   tokensAfter: number;
+  /**
+   * Number of real user messages kept verbatim ahead of the summary in the
+   * post-compaction live context. Written by `ContextMemory.applyCompaction`
+   * (the single derivation point for the post-compaction shape) so the
+   * wire-transcript reducer can reproduce the live folded length without
+   * re-deriving it from the full transcript. Optional for backward
+   * compatibility with older wire records.
+   */
+  keptUserMessageCount?: number;
 }
 
+/** Inputs `ContextMemory.applyCompaction` needs to derive a `CompactionResult`. */
+export type CompactionInput = Pick<CompactionResult, 'summary' | 'compactedCount' | 'tokensBefore'>;
+
 export type CompactionSource = 'manual' | 'auto';
 
 export interface CompactionBeginData {
diff --git a/packages/agent-core/src/agent/context/index.ts b/packages/agent-core/src/agent/context/index.ts
index 568c651ec..e94f6d492 100644
--- a/packages/agent-core/src/agent/context/index.ts
+++ b/packages/agent-core/src/agent/context/index.ts
@@ -3,12 +3,13 @@ import { createToolMessage, type ContentPart, type Message } from '@moonshot-ai/
 import type { Agent } from '..';
 import { ErrorCodes, KimiError } from '../../errors';
 import type { ExecutableToolResult, LoopRecordedEvent } from '../../loop';
-import { estimateTokensForMessages } from '../../utils/tokens';
+import { estimateTokens, estimateTokensForMessages } from '../../utils/tokens';
 import { escapeXml } from '../../utils/xml-escape';
 import {
   COMPACT_USER_MESSAGE_MAX_TOKENS,
   collectCompactableUserMessages,
   selectRecentUserMessages,
+  type CompactionInput,
   type CompactionResult,
 } from '../compaction';
 import { project, trimTrailingOpenToolExchange } from './projector';
@@ -210,7 +211,27 @@ export class ContextMemory {
     }
   }
 
-  applyCompaction(result: CompactionResult): void {
+  applyCompaction(input: CompactionInput): CompactionResult {
+    // Single derivation point for the post-compaction shape: the most recent
+    // real user messages (verbatim, within the token budget) followed by a
+    // user-role summary. `tokensAfter` and `keptUserMessageCount` are derived
+    // here from the actual `_history` so the live context, the wire record,
+    // and the transcript reducer all agree — re-deriving them elsewhere (e.g.
+    // from the full transcript, which still holds the untruncated originals of
+    // messages the live context truncated) would diverge.
+    const keptUserMessages = selectRecentUserMessages(
+      collectCompactableUserMessages(this._history),
+      COMPACT_USER_MESSAGE_MAX_TOKENS,
+    );
+    const tokensAfter =
+      estimateTokens(input.summary) + estimateTokensForMessages(keptUserMessages);
+    const result: CompactionResult = {
+      summary: input.summary,
+      compactedCount: input.compactedCount,
+      tokensBefore: input.tokensBefore,
+      tokensAfter,
+      keptUserMessageCount: keptUserMessages.length,
+    };
     this.agent.records.logRecord({
       type: 'context.apply_compaction',
       ...result,
@@ -221,12 +242,9 @@ export class ContextMemory {
         compactedCount: result.compactedCount,
         tokensBefore: result.tokensBefore,
         tokensAfter: result.tokensAfter,
+        keptUserMessageCount: result.keptUserMessageCount,
       },
     });
-    const keptUserMessages = selectRecentUserMessages(
-      collectCompactableUserMessages(this._history),
-      COMPACT_USER_MESSAGE_MAX_TOKENS,
-    );
     this._history = [
       ...keptUserMessages,
       {
@@ -244,8 +262,9 @@ export class ContextMemory {
     this._tokenCount = result.tokensAfter;
     this.tokenCountCoveredMessageCount = this._history.length;
     this.agent.microCompaction.reset();
-    this.agent.injection.onContextCompacted(result.compactedCount);
+    this.agent.injection.onContextCompacted();
     this.agent.emitStatusUpdated();
+    return result;
   }
 
   data(): AgentContextData {
diff --git a/packages/agent-core/src/agent/injection/injector.ts b/packages/agent-core/src/agent/injection/injector.ts
index 504e412de..d13e18159 100644
--- a/packages/agent-core/src/agent/injection/injector.ts
+++ b/packages/agent-core/src/agent/injection/injector.ts
@@ -9,11 +9,8 @@ export abstract class DynamicInjector {
     this.injectedAt = null;
   }
 
-  onContextCompacted(compactedCount: number): void {
-    if (this.injectedAt !== null) {
-      const newInjectedAt = this.injectedAt - compactedCount + 1;
-      this.injectedAt = newInjectedAt >= 0 ? newInjectedAt : null;
-    }
+  onContextCompacted(): void {
+    this.injectedAt = null;
   }
 
   onContextMessageRemoved(index: number): void {
diff --git a/packages/agent-core/src/agent/injection/manager.ts b/packages/agent-core/src/agent/injection/manager.ts
index 009c3d682..7103f2cdd 100644
--- a/packages/agent-core/src/agent/injection/manager.ts
+++ b/packages/agent-core/src/agent/injection/manager.ts
@@ -51,10 +51,10 @@ export class InjectionManager {
     }
   }
 
-  onContextCompacted(compactedCount: number): void {
+  onContextCompacted(): void {
     for (const injector of this.lifecycleInjectors()) {
       try {
-        injector.onContextCompacted(compactedCount);
+        injector.onContextCompacted();
       } catch {
         continue;
       }
diff --git a/packages/agent-core/src/agent/injection/permission-mode.ts b/packages/agent-core/src/agent/injection/permission-mode.ts
index 9ba024b15..ffe5389ad 100644
--- a/packages/agent-core/src/agent/injection/permission-mode.ts
+++ b/packages/agent-core/src/agent/injection/permission-mode.ts
@@ -17,7 +17,7 @@ export class PermissionModeInjector extends DynamicInjector {
   private lastMode: PermissionMode | undefined;
   private refreshAfterCompaction = false;
 
-  override onContextCompacted(_compactedCount: number): void {
+  override onContextCompacted(): void {
     this.injectedAt = null;
     this.refreshAfterCompaction = true;
   }
diff --git a/packages/agent-core/src/agent/injection/plan-mode.ts b/packages/agent-core/src/agent/injection/plan-mode.ts
index 209351db4..bbc0d557e 100644
--- a/packages/agent-core/src/agent/injection/plan-mode.ts
+++ b/packages/agent-core/src/agent/injection/plan-mode.ts
@@ -22,10 +22,6 @@ export class PlanModeInjector extends DynamicInjector {
     this.wasActive = this.agent.planMode.isActive;
   }
 
-  override onContextCompacted(): void {
-    this.injectedAt = null;
-  }
-
   override async getInjection(): Promise<string | undefined> {
     const { isActive, planFilePath } = this.agent.planMode;
     if (!isActive) {
diff --git a/packages/agent-core/src/agent/turn/index.ts b/packages/agent-core/src/agent/turn/index.ts
index df115b6d5..9803b03b9 100644
--- a/packages/agent-core/src/agent/turn/index.ts
+++ b/packages/agent-core/src/agent/turn/index.ts
@@ -662,9 +662,14 @@ export class TurnFlow {
           },
           hooks: {
             beforeStep: async ({ signal: stepSignal }) => {
-              this.flushSteerBuffer();
               this.agent.microCompaction.detect();
               await this.agent.fullCompaction.beforeStep(stepSignal);
+              // Flush steered messages (background-task / cron notifications,
+              // user interrupts) AFTER compaction so they land in the
+              // post-compaction context instead of being dropped by it:
+              // compaction keeps only genuine user prompts and discards these
+              // origins, and they are not re-injected later.
+              this.flushSteerBuffer();
               await this.agent.injection.inject();
               deduper.beginStep();
               return;
diff --git a/packages/agent-core/src/index.ts b/packages/agent-core/src/index.ts
index 14dcec22a..ae63a8604 100644
--- a/packages/agent-core/src/index.ts
+++ b/packages/agent-core/src/index.ts
@@ -62,6 +62,12 @@ export type {
 export { AGENT_WIRE_PROTOCOL_VERSION } from './agent/records';
 export type { AgentConfigUpdateData } from './agent/config';
 export type { CompactionBeginData, CompactionResult } from './agent/compaction';
+export {
+  COMPACT_USER_MESSAGE_MAX_TOKENS,
+  collectCompactableUserMessages,
+  isRealUserInput,
+  selectRecentUserMessages,
+} from './agent/compaction';
 export type {
   PermissionApprovalResultRecord,
   PermissionMode,
diff --git a/packages/agent-core/src/services/message/transcript.ts b/packages/agent-core/src/services/message/transcript.ts
index 7961be9b2..b350c7efb 100644
--- a/packages/agent-core/src/services/message/transcript.ts
+++ b/packages/agent-core/src/services/message/transcript.ts
@@ -48,6 +48,7 @@ import type { ExecutableToolResult, LoopRecordedEvent } from '../../loop';
 import {
   COMPACT_USER_MESSAGE_MAX_TOKENS,
   collectCompactableUserMessages,
+  isRealUserInput,
   selectRecentUserMessages,
 } from '../../agent/compaction';
 
@@ -217,7 +218,7 @@ export function reduceWireRecords(records: Iterable<AgentRecord>): {
       if (message.origin?.kind === 'compaction_summary') break;
       transcript.splice(i, 1);
       foldedLength = Math.max(0, foldedLength - 1);
-      if (isRealUserPrompt(message)) {
+      if (isRealUserInput(message)) {
         removedUserCount++;
         if (removedUserCount >= count) break;
       }
@@ -247,10 +248,6 @@ export function reduceWireRecords(records: Iterable<AgentRecord>): {
         // most recent user messages followed by a user-role summary. The
         // transcript keeps the full history and appends the summary marker;
         // foldedLength tracks the post-compaction live context length.
-        const keptUserMessages = selectRecentUserMessages(
-          collectCompactableUserMessages(transcript.map((entry) => entry.message)),
-          COMPACT_USER_MESSAGE_MAX_TOKENS,
-        );
         transcript.push({
           message: {
             role: 'user',
@@ -260,7 +257,22 @@ export function reduceWireRecords(records: Iterable<AgentRecord>): {
           },
           time: record.time,
         });
-        foldedLength = keptUserMessages.length + 1;
+        // Prefer the kept-user count recorded by the live
+        // ContextMemory.applyCompaction. Re-deriving it from the full
+        // transcript would diverge from the live context: the transcript still
+        // holds the untruncated originals of messages the live context may
+        // have truncated, and (after a clear) messages the live context no
+        // longer has. Only fall back to re-deriving for legacy wire records
+        // that predate the field.
+        if (record.keptUserMessageCount !== undefined) {
+          foldedLength = record.keptUserMessageCount + 1;
+        } else {
+          const keptUserMessages = selectRecentUserMessages(
+            collectCompactableUserMessages(transcript.map((entry) => entry.message)),
+            COMPACT_USER_MESSAGE_MAX_TOKENS,
+          );
+          foldedLength = keptUserMessages.length + 1;
+        }
         // Drop any open tool exchange and deferred messages exactly like
         // ContextMemory.applyCompaction: late tool results become orphans and
         // deferred injections are not rebuilt, so pending ids must not strand
@@ -284,17 +296,6 @@ export function reduceWireRecords(records: Iterable<AgentRecord>): {
   return { entries: transcript as TranscriptEntry[], foldedLength };
 }
 
-/** Mirrors agent-core's `isRealUserPrompt` (context undo accounting). */
-function isRealUserPrompt(message: MutableMessage): boolean {
-  if (message.role !== 'user') return false;
-  const origin = message.origin;
-  if (origin === undefined || origin.kind === 'user') return true;
-  if (origin.kind === 'skill_activation') {
-    return origin.trigger === 'user-slash';
-  }
-  return false;
-}
-
 /** Mirrors agent-core's `toolResultOutputForModel` + `createToolMessage`. */
 function toolResultContent(result: ExecutableToolResult): ContentPart[] {
   const output = result.output;
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index ad5d7c817..d86d8b230 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -2138,6 +2138,7 @@ const alwaysCompactOnce: CompactionStrategy = {
   shouldBlock: () => true,
   checkAfterStep: true,
   maxCompactionPerTurn: 1,
+  maxOverflowCompactionAttempts: 3,
 };
 
 function missingToolCall(): ToolCall {
@@ -2155,6 +2156,7 @@ function overflowOnlyCompactionStrategy(maxSize: number = 14): DefaultCompaction
     blockRatio: Infinity,
     reservedContextSize: 0,
     maxCompactionPerTurn: 3,
+    maxOverflowCompactionAttempts: 3,
   });
 }
 
diff --git a/packages/agent-core/test/agent/compaction/memento.test.ts b/packages/agent-core/test/agent/compaction/memento.test.ts
index 7591e3207..9ccd9d49c 100644
--- a/packages/agent-core/test/agent/compaction/memento.test.ts
+++ b/packages/agent-core/test/agent/compaction/memento.test.ts
@@ -107,7 +107,7 @@ describe('selectRecentUserMessages', () => {
     expect(selected).toHaveLength(2);
     expect(messageText(selected[1]!)).toBe('recent');
     expect(estimateTokens(truncated)).toBeLessThanOrEqual(333);
-    expect([...truncated].every((char) => char === '😀')).toBe(true);
+    expect(/^(?:😀)*$/u.test(truncated)).toBe(true);
     expect(truncated.length % 2).toBe(0);
   });
 
diff --git a/packages/agent-core/test/agent/compaction/strategy.test.ts b/packages/agent-core/test/agent/compaction/strategy.test.ts
index e87aa4554..84422eb4e 100644
--- a/packages/agent-core/test/agent/compaction/strategy.test.ts
+++ b/packages/agent-core/test/agent/compaction/strategy.test.ts
@@ -42,6 +42,7 @@ describe('DefaultCompactionStrategy', () => {
       blockRatio: 0.9,
       reservedContextSize: 50_000,
       maxCompactionPerTurn: 3,
+      maxOverflowCompactionAttempts: 3,
     });
 
     expect(strategy.shouldCompact(1)).toBe(false);
@@ -64,6 +65,7 @@ describe('DefaultCompactionStrategy', () => {
       blockRatio: 0.9,
       reservedContextSize: 0,
       maxCompactionPerTurn: 3,
+      maxOverflowCompactionAttempts: 3,
     });
 
     expect(strategy.checkAfterStep).toBe(true);
@@ -82,5 +84,6 @@ function testCompactionStrategy(maxSize: number = 1_000): DefaultCompactionStrat
     blockRatio: 0.85,
     reservedContextSize: 0,
     maxCompactionPerTurn: 3,
+    maxOverflowCompactionAttempts: 3,
   });
 }
diff --git a/packages/agent-core/test/agent/context.test.ts b/packages/agent-core/test/agent/context.test.ts
index cb603fe1c..99ced1192 100644
--- a/packages/agent-core/test/agent/context.test.ts
+++ b/packages/agent-core/test/agent/context.test.ts
@@ -578,7 +578,6 @@ describe('Agent context', () => {
       summary: 'summary of old prompt',
       compactedCount: 4,
       tokensBefore: 100,
-      tokensAfter: 40,
     });
     ctx.agent.context.appendSystemReminder('second reminder', {
       kind: 'injection',
@@ -645,7 +644,6 @@ describe('Agent context', () => {
       summary: 'summary of old context',
       compactedCount: 1,
       tokensBefore: 100,
-      tokensAfter: 20,
     });
     expect(ctx.agent.context.history.at(-1)?.origin).toEqual({ kind: 'compaction_summary' });
 
@@ -810,7 +808,6 @@ describe('Agent context', () => {
       summary: 'summary of compacted context',
       compactedCount: 1,
       tokensBefore: 100,
-      tokensAfter: 20,
     });
     ctx.agent.context.appendUserMessage([{ type: 'text', text: 'recent user message' }]);
     ctx.agent.context.appendMessage({
@@ -854,7 +851,6 @@ describe('Agent context', () => {
       summary: 'summary of compacted context',
       compactedCount: 1,
       tokensBefore: 100,
-      tokensAfter: 20,
     });
     ctx.agent.context.appendUserMessage([{ type: 'text', text: 'recent user message' }]);
     ctx.agent.context.appendMessage({
diff --git a/packages/agent-core/test/agent/injection/manager.test.ts b/packages/agent-core/test/agent/injection/manager.test.ts
index a8a91ea93..e5c91740e 100644
--- a/packages/agent-core/test/agent/injection/manager.test.ts
+++ b/packages/agent-core/test/agent/injection/manager.test.ts
@@ -15,9 +15,9 @@ class RecordingInjector extends DynamicInjector {
     super.onContextClear();
   }
 
-  override onContextCompacted(compactedCount: number): void {
+  override onContextCompacted(): void {
     this.compactionCalls += 1;
-    super.onContextCompacted(compactedCount);
+    super.onContextCompacted();
   }
 
   protected override getInjection(): string | undefined {
@@ -28,7 +28,7 @@ class RecordingInjector extends DynamicInjector {
 class BoomInjector extends DynamicInjector {
   override readonly injectionVariant = 'boom_test';
 
-  override onContextCompacted(_compactedCount: number): void {
+  override onContextCompacted(): void {
     throw new Error('boom-compact');
   }
 
@@ -49,7 +49,7 @@ describe('InjectionManager.onContextCompacted', () => {
     const b = new RecordingInjector(ctx.agent);
     installInjectors(ctx.agent.injection, [a, b]);
 
-    ctx.agent.injection.onContextCompacted(3);
+    ctx.agent.injection.onContextCompacted();
 
     expect(a.compactionCalls).toBe(1);
     expect(b.compactionCalls).toBe(1);
@@ -62,7 +62,7 @@ describe('InjectionManager.onContextCompacted', () => {
     installInjectors(ctx.agent.injection, [new BoomInjector(ctx.agent), recorder]);
 
     expect(() => {
-      ctx.agent.injection.onContextCompacted(2);
+      ctx.agent.injection.onContextCompacted();
     }).not.toThrow();
     expect(recorder.compactionCalls).toBe(1);
   });
@@ -74,11 +74,11 @@ describe('InjectionManager.onContextCompacted', () => {
     installInjectors(ctx.agent.injection, [new BoomInjector(ctx.agent), recorder]);
 
     expect(() => {
-      ctx.agent.injection.onContextCompacted(1);
+      ctx.agent.injection.onContextCompacted();
     }).not.toThrow();
     expect(recorder.compactionCalls).toBe(1);
 
-    ctx.agent.injection.onContextCompacted(1);
+    ctx.agent.injection.onContextCompacted();
     expect(recorder.compactionCalls).toBe(2);
   });
 
diff --git a/packages/agent-core/test/agent/permission.test.ts b/packages/agent-core/test/agent/permission.test.ts
index 89c7d92ad..3e3a75d6e 100644
--- a/packages/agent-core/test/agent/permission.test.ts
+++ b/packages/agent-core/test/agent/permission.test.ts
@@ -286,7 +286,7 @@ describe('Permission auto mode', () => {
 
     await injector.inject();
     appendSystemReminder.mockClear();
-    injector.onContextCompacted(1);
+    injector.onContextCompacted();
     await injector.inject();
 
     expect(appendSystemReminder).toHaveBeenCalledWith(
@@ -306,7 +306,7 @@ describe('Permission auto mode', () => {
 
     await injector.inject();
     appendSystemReminder.mockClear();
-    injector.onContextCompacted(1);
+    injector.onContextCompacted();
     permission.mode = 'manual';
     await injector.inject();
 
diff --git a/packages/agent-core/test/services/message-transcript.test.ts b/packages/agent-core/test/services/message-transcript.test.ts
index c17f50b51..b90cd4b31 100644
--- a/packages/agent-core/test/services/message-transcript.test.ts
+++ b/packages/agent-core/test/services/message-transcript.test.ts
@@ -127,6 +127,30 @@ describe('reduceWireRecords', () => {
     expect(foldedLength).toBe(3);
   });
 
+  it('uses the recorded kept-user count for foldedLength when present', () => {
+    // The live context kept only the most recent real user message (e.g. the
+    // older ones were truncated in a prior compaction, or a clear dropped
+    // them). The full transcript still holds all three, so re-deriving from
+    // it would yield 3 and disagree with the live context. The reducer must
+    // trust the count recorded by ContextMemory.applyCompaction.
+    const { foldedLength } = reduceWireRecords([
+      appendMessage(userMessage('u1')),
+      appendMessage(userMessage('u2')),
+      appendMessage(userMessage('u3')),
+      {
+        type: 'context.apply_compaction',
+        summary: 'SUM',
+        compactedCount: 3,
+        tokensBefore: 100,
+        tokensAfter: 20,
+        keptUserMessageCount: 1,
+      } as AgentRecord,
+      appendMessage(userMessage('u4')),
+    ]);
+    // 1 kept user message + summary + u4 appended after compaction.
+    expect(foldedLength).toBe(3);
+  });
+
   it('drops a late tool result after compaction closes an open exchange', () => {
     const { entries, foldedLength } = reduceWireRecords([
       appendMessage(userMessage('u1')),
diff --git a/packages/protocol/src/events.ts b/packages/protocol/src/events.ts
index d0e22e0d6..190eeca8e 100644
--- a/packages/protocol/src/events.ts
+++ b/packages/protocol/src/events.ts
@@ -284,6 +284,15 @@ export interface CompactionResult {
   readonly compactedCount: number;
   readonly tokensBefore: number;
   readonly tokensAfter: number;
+  /**
+   * Number of real user messages kept verbatim ahead of the summary in the
+   * post-compaction live context. Recorded so the wire-transcript reducer can
+   * reproduce the live folded length without re-deriving it from the full
+   * transcript (which still holds the untruncated originals of messages the
+   * live context may have truncated, so the two would otherwise diverge).
+   * Optional for backward compatibility with older wire records.
+   */
+  readonly keptUserMessageCount?: number;
 }
 
 export interface ToolUpdate {
@@ -944,6 +953,7 @@ export const compactionResultSchema = z.object({
   compactedCount: z.number(),
   tokensBefore: z.number(),
   tokensAfter: z.number(),
+  keptUserMessageCount: z.number().optional(),
 }) satisfies z.ZodType<CompactionResult>;
 
 export const toolUpdateSchema = z.object({

From 81968856bdfeb7967639fc8bd053dc117327d144 Mon Sep 17 00:00:00 2001
From: 7Sageer <7sageer@djwcb.cn>
Date: Mon, 29 Jun 2026 17:44:15 +0800
Subject: [PATCH 08/16] fix(agent-core): preserve compaction bookkeeping across
 resume

applyCompaction now preserves the persisted tokensAfter and
keptUserMessageCount when replaying a compaction record during resume,
so restored bookkeeping matches the wire record instead of being
re-derived from replayed history (which can drift when token estimation
changes, and breaks replay projections that assert the recorded values).
Live compaction still derives both values from the current history.

Update the affected compaction, resume, and replay-range tests.
---
 .changeset/compaction-internals-cleanup.md     |  2 +-
 .../agent-core/src/agent/compaction/types.ts   | 10 ++++++++--
 packages/agent-core/src/agent/context/index.ts |  7 ++++++-
 .../test/agent/compaction/full.test.ts         | 18 +++++++++---------
 .../test/agent/records/index.test.ts           |  1 +
 packages/agent-core/test/agent/resume.test.ts  |  1 +
 6 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/.changeset/compaction-internals-cleanup.md b/.changeset/compaction-internals-cleanup.md
index 7558ffd7b..6d5bc7280 100644
--- a/.changeset/compaction-internals-cleanup.md
+++ b/.changeset/compaction-internals-cleanup.md
@@ -2,4 +2,4 @@
 "@moonshot-ai/kimi-code": patch
 ---
 
-Tighten compaction bookkeeping so compacted history stays consistent across retries.
+Tighten compaction bookkeeping so compacted history stays consistent across retries and resume.
diff --git a/packages/agent-core/src/agent/compaction/types.ts b/packages/agent-core/src/agent/compaction/types.ts
index 511782b7b..92b55ad0e 100644
--- a/packages/agent-core/src/agent/compaction/types.ts
+++ b/packages/agent-core/src/agent/compaction/types.ts
@@ -14,8 +14,14 @@ export interface CompactionResult {
   keptUserMessageCount?: number;
 }
 
-/** Inputs `ContextMemory.applyCompaction` needs to derive a `CompactionResult`. */
-export type CompactionInput = Pick<CompactionResult, 'summary' | 'compactedCount' | 'tokensBefore'>;
+/**
+ * Inputs `ContextMemory.applyCompaction` needs to derive a `CompactionResult`.
+ * `tokensAfter` / `keptUserMessageCount` are optional: the live path omits them
+ * (they are derived from the current history), while restore passes the
+ * persisted record so its historical values are preserved verbatim.
+ */
+export type CompactionInput = Pick<CompactionResult, 'summary' | 'compactedCount' | 'tokensBefore'> &
+  Partial<Pick<CompactionResult, 'tokensAfter' | 'keptUserMessageCount'>>;
 
 export type CompactionSource = 'manual' | 'auto';
 
diff --git a/packages/agent-core/src/agent/context/index.ts b/packages/agent-core/src/agent/context/index.ts
index e94f6d492..1f1693077 100644
--- a/packages/agent-core/src/agent/context/index.ts
+++ b/packages/agent-core/src/agent/context/index.ts
@@ -223,14 +223,19 @@ export class ContextMemory {
       collectCompactableUserMessages(this._history),
       COMPACT_USER_MESSAGE_MAX_TOKENS,
     );
+    // Live compaction omits these so they are derived from the actual
+    // `_history`; restore passes the persisted record so its historical values
+    // are preserved verbatim.
     const tokensAfter =
+      input.tokensAfter ??
       estimateTokens(input.summary) + estimateTokensForMessages(keptUserMessages);
+    const keptUserMessageCount = input.keptUserMessageCount ?? keptUserMessages.length;
     const result: CompactionResult = {
       summary: input.summary,
       compactedCount: input.compactedCount,
       tokensBefore: input.tokensBefore,
       tokensAfter,
-      keptUserMessageCount: keptUserMessages.length,
+      keptUserMessageCount,
     };
     this.agent.records.logRecord({
       type: 'context.apply_compaction',
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index d86d8b230..590394692 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -78,10 +78,10 @@ describe('FullCompaction', () => {
       [emit] compaction.started         { "trigger": "manual", "instruction": "Keep the important test facts." }
       [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 184, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
       [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 120, "maxContextTokens": 256000, "contextUsage": 0.00046875, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 184, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 184, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction   { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119, "time": "<time>" }
+      [wire] context.apply_compaction   { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119, "keptUserMessageCount": 3, "time": "<time>" }
       [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 119, "maxContextTokens": 256000, "contextUsage": 0.00046484375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 184, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 184, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete   { "time": "<time>" }
-      [emit] compaction.completed       { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119 } }
+      [emit] compaction.completed       { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119, "keptUserMessageCount": 3 } }
     `);
     expect(ctx.lastLlmInput()).toMatchInlineSnapshot(`
       system: <system-prompt>
@@ -898,10 +898,10 @@ describe('FullCompaction', () => {
       [wire] context.append_message     { "message": { "role": "user", "content": [ { "type": "text", "text": "new user while compacting" } ], "toolCalls": [], "origin": { "kind": "user" } }, "time": "<time>" }
       [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 162, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
       [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 80, "maxContextTokens": 256000, "contextUsage": 0.0003125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 162, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 162, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction   { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 114, "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 114, "maxContextTokens": 256000, "contextUsage": 0.0004453125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 162, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 162, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.apply_compaction   { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 122, "keptUserMessageCount": 3, "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 122, "maxContextTokens": 256000, "contextUsage": 0.0004765625, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 162, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 162, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete   { "time": "<time>" }
-      [emit] compaction.completed       { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 114 } }
+      [emit] compaction.completed       { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 122, "keptUserMessageCount": 3 } }
     `);
     expect(ctx.lastLlmInput()).toMatchInlineSnapshot(`
       system: <system-prompt>
@@ -1006,10 +1006,10 @@ describe('FullCompaction', () => {
       [emit] compaction.blocked          { "turnId": 0 }
       [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 183, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
       [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 950000, "maxContextTokens": 256000, "contextUsage": 3.7109375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 183, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 183, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction    { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nAuto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 127, "time": "<time>" }
+      [wire] context.apply_compaction    { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nAuto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 127, "keptUserMessageCount": 4, "time": "<time>" }
       [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 127, "maxContextTokens": 256000, "contextUsage": 0.00049609375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 183, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 183, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete    { "time": "<time>" }
-      [emit] compaction.completed        { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nAuto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 127 } }
+      [emit] compaction.completed        { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nAuto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 127, "keptUserMessageCount": 4 } }
       [wire] context.append_loop_event   { "event": { "type": "step.begin", "uuid": "<uuid-1>", "turnId": "0", "step": 1 }, "time": "<time>" }
       [emit] turn.step.started           { "turnId": 0, "step": 1, "stepId": "<uuid-1>" }
       [emit] assistant.delta             { "turnId": 0, "delta": "I can answer after compaction." }
@@ -1955,10 +1955,10 @@ describe('FullCompaction', () => {
       [emit] compaction.blocked          { "turnId": 0 }
       [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 145, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
       [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 0, "maxContextTokens": 1000000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 145, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 145, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction    { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nFirst compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 114, "time": "<time>" }
+      [wire] context.apply_compaction    { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nFirst compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 114, "keptUserMessageCount": 1, "time": "<time>" }
       [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 114, "maxContextTokens": 1000000, "contextUsage": 0.000114, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 145, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 145, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete    { "time": "<time>" }
-      [emit] compaction.completed        { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nFirst compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 114 } }
+      [emit] compaction.completed        { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nFirst compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 114, "keptUserMessageCount": 1 } }
       [wire] context.append_loop_event   { "event": { "type": "step.begin", "uuid": "<uuid-1>", "turnId": "0", "step": 1 }, "time": "<time>" }
       [emit] turn.step.started           { "turnId": 0, "step": 1, "stepId": "<uuid-1>" }
       [emit] assistant.delta             { "turnId": 0, "delta": "I need a tool." }
diff --git a/packages/agent-core/test/agent/records/index.test.ts b/packages/agent-core/test/agent/records/index.test.ts
index 56ed53a1b..ce5c4edad 100644
--- a/packages/agent-core/test/agent/records/index.test.ts
+++ b/packages/agent-core/test/agent/records/index.test.ts
@@ -428,6 +428,7 @@ describe('agent replay range build', () => {
           compactedCount: 0,
           tokensBefore: 10,
           tokensAfter: 3,
+          keptUserMessageCount: 0,
         },
       }),
     ]);
diff --git a/packages/agent-core/test/agent/resume.test.ts b/packages/agent-core/test/agent/resume.test.ts
index 087f7012b..7430d5763 100644
--- a/packages/agent-core/test/agent/resume.test.ts
+++ b/packages/agent-core/test/agent/resume.test.ts
@@ -380,6 +380,7 @@ describe('Agent resume', () => {
           compactedCount: 1,
           tokensBefore: 120,
           tokensAfter: 24,
+          keptUserMessageCount: 1,
         },
         instruction: 'preserve implementation notes',
       }),

From c14d30da1dd0157b7d644cefdf58ba36a973127e Mon Sep 17 00:00:00 2001
From: 7Sageer <7sageer@djwcb.cn>
Date: Mon, 29 Jun 2026 17:58:42 +0800
Subject: [PATCH 09/16] refactor(agent-core): dedupe real-user-input check and
 tighten memento types

- Reuse the shared isRealUserInput helper in ContextMemory.undo and SessionService.canUndoHistory instead of two local copies.

- Sync the wire-transcript header comment with the new post-compaction shape ([...keptUserMessages, compaction_summary]).

- Tighten memento.ts types by using kosong ContentPart and widening estimateTokensForMessage to a structural subset, dropping the `as never` cast.
---
 .../src/agent/compaction/memento.ts           | 26 ++++++++++++-------
 .../agent-core/src/agent/context/index.ts     | 13 ++--------
 .../src/services/message/transcript.ts        | 13 +++++++---
 .../src/services/session/sessionService.ts    | 10 ++-----
 packages/agent-core/src/utils/tokens.ts       | 17 ++++++++++--
 5 files changed, 45 insertions(+), 34 deletions(-)

diff --git a/packages/agent-core/src/agent/compaction/memento.ts b/packages/agent-core/src/agent/compaction/memento.ts
index 705278ea3..1f9278ca2 100644
--- a/packages/agent-core/src/agent/compaction/memento.ts
+++ b/packages/agent-core/src/agent/compaction/memento.ts
@@ -1,3 +1,4 @@
+import type { ContentPart } from '@moonshot-ai/kosong';
 import { estimateTokensForMessage } from '../../utils/tokens';
 import summaryPrefixTemplate from './compaction-summary-prefix.md?raw';
 
@@ -14,21 +15,23 @@ import summaryPrefixTemplate from './compaction-summary-prefix.md?raw';
 export const COMPACTION_SUMMARY_PREFIX = summaryPrefixTemplate.trimEnd();
 export const COMPACT_USER_MESSAGE_MAX_TOKENS = 20_000;
 
-interface ContentPartLike {
-  readonly type: string;
-  readonly text?: string;
-}
-
+/**
+ * Structural subset of kosong's `Message` that the memento helpers inspect.
+ * Both `ContextMessage` (the live context) and the wire-transcript reducer's
+ * mutable message satisfy this shape, so one set of helpers serves both
+ * layers without introducing a shared nominal type. `origin` is what tells
+ * real user input apart from injections and compaction summaries.
+ */
 interface MessageLike {
   readonly role: string;
-  readonly content: readonly ContentPartLike[];
+  readonly content: readonly ContentPart[];
   readonly origin?: { readonly kind: string; readonly trigger?: string } | undefined;
 }
 
-function extractText(content: readonly ContentPartLike[]): string {
+function extractText(content: readonly ContentPart[]): string {
   let text = '';
   for (const part of content) {
-    if (part.type === 'text' && typeof part.text === 'string') {
+    if (part.type === 'text') {
       text += part.text;
     }
   }
@@ -85,6 +88,11 @@ function truncateTextToTokens(text: string, maxTokens: number): string {
 
 function truncateUserMessage<T extends MessageLike>(message: T, maxTokens: number): T {
   const text = truncateTextToTokens(extractText(message.content), maxTokens);
+  // Spread the original message to preserve every field (notably `origin`),
+  // then replace the content with the truncated text and drop any tool calls.
+  // Real user input never carries tool calls, so clearing them is safe. The
+  // cast back to `T` is unavoidable here: TypeScript cannot prove that a
+  // spread-then-override shape still equals the generic `T`.
   return {
     ...message,
     content: [{ type: 'text', text }],
@@ -105,7 +113,7 @@ export function selectRecentUserMessages<T extends MessageLike>(
   let remaining = maxTokens;
   for (let i = messages.length - 1; i >= 0 && remaining > 0; i--) {
     const message = messages[i]!;
-    const tokens = estimateTokensForMessage(message as never);
+    const tokens = estimateTokensForMessage(message);
     if (tokens <= remaining) {
       selected.push(message);
       remaining -= tokens;
diff --git a/packages/agent-core/src/agent/context/index.ts b/packages/agent-core/src/agent/context/index.ts
index 1f1693077..1e2f9b4b9 100644
--- a/packages/agent-core/src/agent/context/index.ts
+++ b/packages/agent-core/src/agent/context/index.ts
@@ -8,6 +8,7 @@ import { escapeXml } from '../../utils/xml-escape';
 import {
   COMPACT_USER_MESSAGE_MAX_TOKENS,
   collectCompactableUserMessages,
+  isRealUserInput,
   selectRecentUserMessages,
   type CompactionInput,
   type CompactionResult,
@@ -178,7 +179,7 @@ export class ContextMemory {
         this._tokenCount -= estimateTokensForMessages([message]);
       }
 
-      if (isRealUserPrompt(message)) {
+      if (isRealUserInput(message)) {
         removedUserCount++;
         if (removedUserCount >= count) break;
       }
@@ -497,16 +498,6 @@ function isEmptyOutputText(output: string): boolean {
   return output.length === 0 || output.trim() === TOOL_OUTPUT_EMPTY_TEXT;
 }
 
-function isRealUserPrompt(message: ContextMessage): boolean {
-  if (message.role !== 'user') return false;
-  const origin = message.origin;
-  if (origin === undefined || origin.kind === 'user') return true;
-  if (origin.kind === 'skill_activation') {
-    return origin.trigger === 'user-slash';
-  }
-  return false;
-}
-
 function formatUndoUnavailableMessage(
   requestedCount: number,
   undoableCount: number,
diff --git a/packages/agent-core/src/services/message/transcript.ts b/packages/agent-core/src/services/message/transcript.ts
index b350c7efb..a5ecfb7f1 100644
--- a/packages/agent-core/src/services/message/transcript.ts
+++ b/packages/agent-core/src/services/message/transcript.ts
@@ -3,8 +3,10 @@
  * agent from its `wire.jsonl` record log.
  *
  * Why: `ContextMemory.applyCompaction` rewrites the in-memory history as
- * `[compaction_summary, ...tail]`, so `getContext().history` only reflects the
- * model's CURRENT context. The wire log, however, keeps every record. The TUI
+ * `[...keptUserMessages, compaction_summary]` (the most recent real user
+ * prompts, verbatim within a token budget, followed by a single user-role
+ * summary), so `getContext().history` only reflects the model's CURRENT
+ * context. The wire log, however, keeps every record. The TUI
  * shows the full transcript on resume because `ReplayBuilder` captures every
  * `pushHistory` during record replay and is never folded by compaction. This
  * module reproduces that exact view for daemon REST consumers (web), without
@@ -19,8 +21,11 @@
  *                                     open assistant message; tool.result appends a
  *                                     tool message with the same `<system>` status
  *                                     wrapping as `toolResultOutputForModel`
- *   - `context.apply_compaction`    → keep the prefix, insert the summary message
- *                                     at the fold point (origin `compaction_summary`)
+ *   - `context.apply_compaction`    → keep the full history, append the
+ *                                     user-role summary marker (origin
+ *                                     `compaction_summary`), and recover
+ *                                     `foldedLength` from the recorded
+ *                                     `keptUserMessageCount`
  *   - `context.undo`                → remove tail messages exactly like
  *                                     `ContextMemory.undo` (skip injections, stop at
  *                                     compaction summaries / `context.clear` floors)
diff --git a/packages/agent-core/src/services/session/sessionService.ts b/packages/agent-core/src/services/session/sessionService.ts
index 3b684a0dc..18e0370b4 100644
--- a/packages/agent-core/src/services/session/sessionService.ts
+++ b/packages/agent-core/src/services/session/sessionService.ts
@@ -1,6 +1,7 @@
 import { Disposable, IInstantiationService, InstantiationType, registerSingleton } from '../../di';
 import { Emitter } from '../../base/common/event';
 import { ErrorCodes, KimiError } from '../../errors';
+import { isRealUserInput } from '../../agent/compaction';
 import type { AgentContextData, ContextMessage } from '../../agent/context';
 import type { JsonObject, ListSessionsPayload, SessionSummary } from '../../rpc';
 import type { SessionMeta } from '../../session';
@@ -59,7 +60,7 @@ function canUndoHistory(history: readonly ContextMessage[], count: number): bool
     if (message === undefined) continue;
     if (message.origin?.kind === 'injection') continue;
     if (message.origin?.kind === 'compaction_summary') return false;
-    if (isRealUserPrompt(message)) {
+    if (isRealUserInput(message)) {
       found++;
       if (found >= count) return true;
     }
@@ -67,13 +68,6 @@ function canUndoHistory(history: readonly ContextMessage[], count: number): bool
   return false;
 }
 
-function isRealUserPrompt(message: ContextMessage): boolean {
-  if (message.role !== 'user') return false;
-  const origin = message.origin;
-  if (origin === undefined || origin.kind === 'user') return true;
-  return origin.kind === 'skill_activation' && origin.trigger === 'user-slash';
-}
-
 function pageContextMessages(
   sessionId: string,
   sessionCreatedAtMs: number,
diff --git a/packages/agent-core/src/utils/tokens.ts b/packages/agent-core/src/utils/tokens.ts
index fe567f732..af8b70152 100644
--- a/packages/agent-core/src/utils/tokens.ts
+++ b/packages/agent-core/src/utils/tokens.ts
@@ -1,6 +1,19 @@
 import type { ContentPart, Message, Tool } from '@moonshot-ai/kosong';
 
-const messageTokenEstimateCache = new WeakMap<Message, number>();
+/**
+ * Structural subset of kosong's {@link Message} that token estimation reads.
+ * Accepting the subset (instead of the full `Message`) lets callers with
+ * message-shaped objects — such as the compaction helpers in `memento.ts`,
+ * which carry only `role`/`content`/`origin` — estimate tokens without an
+ * unsafe cast, while full `Message` values still satisfy it.
+ */
+interface TokenEstimatableMessage {
+  readonly role: string;
+  readonly content: readonly ContentPart[];
+  readonly toolCalls?: readonly { readonly name: string; readonly arguments: unknown }[];
+}
+
+const messageTokenEstimateCache = new WeakMap<TokenEstimatableMessage, number>();
 
 /**
  * Estimate token count from text using a character-based heuristic.
@@ -41,7 +54,7 @@ export function estimateTokensForTools(tools: readonly Tool[]): number {
   return total;
 }
 
-export function estimateTokensForMessage(message: Message): number {
+export function estimateTokensForMessage(message: TokenEstimatableMessage): number {
   const cached = messageTokenEstimateCache.get(message);
   if (cached !== undefined) {
     return cached;

From 15c4e0541ea156b4ff5f6cd2815de04b5cd035ac Mon Sep 17 00:00:00 2001
From: 7Sageer <7sageer@djwcb.cn>
Date: Mon, 29 Jun 2026 19:26:48 +0800
Subject: [PATCH 10/16] refactor(agent-core): centralize compaction retention
 policy

Make the keep/drop decision for user-role messages explicit in the compaction memento helpers and cover every PromptOrigin kind. Keep Codex-style semantics: only real user prompts and user-slash skill activations survive compaction; other user-role messages are either re-injected or ephemeral. Add parity coverage across live context, transcript, and vis projector tests.
---
 .../server/test/lib/context-projector.test.ts |  38 ++++++
 .../src/agent/compaction/memento.ts           |  53 ++++++--
 packages/agent-core/src/agent/turn/index.ts   |   7 +-
 .../test/agent/compaction/full.test.ts        |  79 +++++++++++-
 .../test/agent/compaction/memento.test.ts     | 115 ++++++++++++++++++
 .../agent-core/test/agent/context.test.ts     |  31 +++++
 .../test/services/message-transcript.test.ts  |  37 ++++++
 7 files changed, 345 insertions(+), 15 deletions(-)

diff --git a/apps/vis/server/test/lib/context-projector.test.ts b/apps/vis/server/test/lib/context-projector.test.ts
index a4be1196e..176c53a8b 100644
--- a/apps/vis/server/test/lib/context-projector.test.ts
+++ b/apps/vis/server/test/lib/context-projector.test.ts
@@ -299,6 +299,44 @@ describe('context-projector', () => {
     expect(proj.messages[2]!.message.content[0]).toMatchObject({ text: 'sum' });
   });
 
+  it('apply_compaction drops shell/local-command/background messages in model mode only', () => {
+    const entries = [
+      { lineNo: 1, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'real user' }], toolCalls: [], origin: { kind: 'user' as const } } }, raw: {} },
+      { lineNo: 2, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: '! pwd' }], toolCalls: [], origin: { kind: 'shell_command' as const, phase: 'input' as const } } }, raw: {} },
+      { lineNo: 3, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'local output' }], toolCalls: [], origin: { kind: 'injection' as const, variant: 'local-command-stdout' } } }, raw: {} },
+      { lineNo: 4, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'background done' }], toolCalls: [], origin: { kind: 'background_task' as const, taskId: 'task', status: 'completed' as const, notificationId: 'notification' } } }, raw: {} },
+      { lineNo: 5, data: { type: 'context.append_message' as const,
+          message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'assistant reply' }], toolCalls: [] } }, raw: {} },
+      { lineNo: 6, data: { type: 'context.apply_compaction' as const,
+          summary: 'sum', compactedCount: 5, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
+      { lineNo: 7, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'new' }], toolCalls: [], origin: { kind: 'user' as const } } }, raw: {} },
+    ];
+
+    const model = projectContext(entries as any);
+    expect(model.messages.map((m) => m.source)).toEqual([
+      'append_message', 'compaction_summary', 'append_message',
+    ]);
+    expect(model.messages.map((m) => m.message.content[0])).toMatchObject([
+      { text: 'real user' }, { text: 'sum' }, { text: 'new' },
+    ]);
+
+    const full = projectContext(entries as any, 'full');
+    expect(full.messages.map((m) => m.source)).toEqual([
+      'append_message', 'append_message', 'append_message', 'append_message',
+      'append_message', 'compaction_summary', 'append_message',
+    ]);
+    expect(full.messages.map((m) => m.message.content[0])).toMatchObject([
+      { text: 'real user' }, { text: '! pwd' }, { text: 'local output' },
+      { text: 'background done' }, { text: 'assistant reply' }, { text: 'sum' },
+      { text: 'new' },
+    ]);
+  });
+
   // ---- Fix ④: UI-only markers must not offset agent-core history indices ------
   // agent-core computes compactedCount (and the micro-compaction cutoff) as
   // indices into _history, which NEVER contains the synthetic 'undo'/'clear'
diff --git a/packages/agent-core/src/agent/compaction/memento.ts b/packages/agent-core/src/agent/compaction/memento.ts
index 1f9278ca2..db6329169 100644
--- a/packages/agent-core/src/agent/compaction/memento.ts
+++ b/packages/agent-core/src/agent/compaction/memento.ts
@@ -1,5 +1,6 @@
 import type { ContentPart } from '@moonshot-ai/kosong';
 import { estimateTokensForMessage } from '../../utils/tokens';
+import type { PromptOrigin } from '../context/types';
 import summaryPrefixTemplate from './compaction-summary-prefix.md?raw';
 
 /**
@@ -25,7 +26,44 @@ export const COMPACT_USER_MESSAGE_MAX_TOKENS = 20_000;
 interface MessageLike {
   readonly role: string;
   readonly content: readonly ContentPart[];
-  readonly origin?: { readonly kind: string; readonly trigger?: string } | undefined;
+  readonly origin?: PromptOrigin | undefined;
+}
+
+export type CompactionUserDisposition = 'keep' | 'drop';
+
+/**
+ * Single source of truth for whether a user-role message survives compaction as
+ * genuine user input. Codex-style semantics: only real user prompts and
+ * user-slash skill activations are kept verbatim. Everything else user-role is
+ * either rebuilt by injectors after compaction or intentionally ephemeral, so
+ * it is dropped from the live context even when transcript/replay retains it
+ * for UI rendering. New `PromptOrigin` kinds must update this switch.
+ */
+export function compactionUserMessageDisposition(
+  origin: PromptOrigin | undefined,
+): CompactionUserDisposition {
+  if (origin === undefined) return 'keep';
+  switch (origin.kind) {
+    case 'user':
+      return 'keep';
+    case 'skill_activation':
+      return origin.trigger === 'user-slash' ? 'keep' : 'drop';
+    case 'injection':
+    case 'shell_command':
+    case 'compaction_summary':
+    case 'system_trigger':
+    case 'background_task':
+    case 'cron_job':
+    case 'cron_missed':
+    case 'hook_result':
+    case 'retry':
+      return 'drop';
+    default: {
+      const _exhaustive: never = origin;
+      void _exhaustive;
+      return 'drop';
+    }
+  }
 }
 
 function extractText(content: readonly ContentPart[]): string {
@@ -45,18 +83,11 @@ export function isCompactionSummaryMessage(message: MessageLike): boolean {
 
 /**
  * Keep only genuine user input (real user prompts and user-slash skill
- * activations). Injections (system reminders, plan-mode reminders),
- * background-task notifications, system triggers, cron/hook/retry messages,
- * and previous compaction summaries are excluded — they are either
- * re-injected each turn or ephemeral, since initial context is rebuilt
- * every turn.
+ * activations). See `compactionUserMessageDisposition` for the full keep/drop
+ * policy and the rationale for each origin.
  */
 export function isRealUserInput(message: MessageLike): boolean {
-  if (message.role !== 'user') return false;
-  const origin = message.origin;
-  if (origin === undefined || origin.kind === 'user') return true;
-  if (origin.kind === 'skill_activation') return origin.trigger === 'user-slash';
-  return false;
+  return message.role === 'user' && compactionUserMessageDisposition(message.origin) === 'keep';
 }
 
 export function collectCompactableUserMessages<T extends MessageLike>(messages: readonly T[]): T[] {
diff --git a/packages/agent-core/src/agent/turn/index.ts b/packages/agent-core/src/agent/turn/index.ts
index 9803b03b9..d2fb3c8c5 100644
--- a/packages/agent-core/src/agent/turn/index.ts
+++ b/packages/agent-core/src/agent/turn/index.ts
@@ -666,9 +666,10 @@ export class TurnFlow {
               await this.agent.fullCompaction.beforeStep(stepSignal);
               // Flush steered messages (background-task / cron notifications,
               // user interrupts) AFTER compaction so they land in the
-              // post-compaction context instead of being dropped by it:
-              // compaction keeps only genuine user prompts and discards these
-              // origins, and they are not re-injected later.
+              // post-compaction context instead of being dropped by it. The
+              // keep/drop decision lives in
+              // `compactionUserMessageDisposition()`; these origins are not
+              // re-injected later, so append them only after compaction runs.
               this.flushSteerBuffer();
               await this.agent.injection.inject();
               deduper.beginStep();
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index ba1b21bcf..3eceebb5b 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -25,7 +25,7 @@ import {
 } from '../../../src/agent/compaction';
 import { FLAG_DEFINITIONS, MASTER_ENV } from '../../../src/flags';
 import { HookEngine, type HookEngineTriggerArgs } from '../../../src/session/hooks';
-import { estimateTokensForMessages } from '../../../src/utils/tokens';
+import { estimateTokens, estimateTokensForMessages } from '../../../src/utils/tokens';
 import { recordingTelemetry, type TelemetryRecord } from '../../fixtures/telemetry';
 import type { TestAgentContext, TestAgentOptions } from '../harness/agent';
 import { testAgent } from '../harness/agent';
@@ -133,6 +133,83 @@ describe('FullCompaction', () => {
     await ctx.expectResumeMatches();
   });
 
+  it('keeps only real user input and re-injects permission reminders after compaction', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.appendExchange(1, 'real user one', 'assistant one', 20);
+    ctx.agent.context.appendBashInput('pwd');
+    ctx.agent.context.appendBashOutput('/tmp/repo', '', false);
+    ctx.agent.context.appendLocalCommandStdout('local command output');
+    ctx.agent.context.appendSystemReminder('stale reminder', {
+      kind: 'injection',
+      variant: 'system_reminder',
+    });
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: 'background task done' }], {
+      kind: 'background_task',
+      taskId: 'task-1',
+      status: 'completed',
+      notificationId: 'notification-1',
+    });
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: 'real user two' }]);
+    ctx.agent.permission.setMode('auto');
+
+    const permissionReminder = new Promise<void>((resolve) => {
+      const handler = (entry: unknown) => {
+        const record = entry as {
+          event?: string;
+          args?: { message?: { origin?: { kind?: string; variant?: string } } };
+        };
+        const origin = record.args?.message?.origin;
+        if (
+          record.event === 'context.append_message' &&
+          origin?.kind === 'injection' &&
+          origin.variant === 'permission_mode'
+        ) {
+          ctx.emitter.off('context.append_message', handler);
+          resolve();
+        }
+      };
+      ctx.emitter.on('context.append_message', handler);
+    });
+
+    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.completed');
+    await permissionReminder;
+
+    expect(ctx.agent.context.history.map((message) => message.origin?.kind ?? 'user')).toEqual([
+      'user',
+      'user',
+      'compaction_summary',
+      'injection',
+    ]);
+    expect(
+      ctx.agent.context.history.map((message) =>
+        message.origin?.kind === 'injection' ? message.origin.variant : undefined,
+      ),
+    ).toEqual([undefined, undefined, undefined, 'permission_mode']);
+
+    const applyCompaction = [...ctx.allEvents]
+      .toReversed()
+      .find((entry) => entry.type === '[wire]' && entry.event === 'context.apply_compaction');
+    expect(applyCompaction).toBeDefined();
+    const record = applyCompaction?.args as {
+      keptUserMessageCount?: number;
+      tokensAfter?: number;
+      summary?: string;
+    };
+    expect(record.keptUserMessageCount).toBe(2);
+    const expectedSummary = `${COMPACTION_SUMMARY_PREFIX}\nCompacted summary.`;
+    expect(record.summary).toBe(expectedSummary);
+    expect(record.tokensAfter).toBe(
+      estimateTokens(expectedSummary) +
+        estimateTokensForMessages(ctx.agent.context.history.slice(0, 2)),
+    );
+  });
+
   it('refreshes the system prompt after compaction completes', async () => {
     const ctx = testAgent();
     ctx.configure({
diff --git a/packages/agent-core/test/agent/compaction/memento.test.ts b/packages/agent-core/test/agent/compaction/memento.test.ts
index 9ccd9d49c..61b703eb3 100644
--- a/packages/agent-core/test/agent/compaction/memento.test.ts
+++ b/packages/agent-core/test/agent/compaction/memento.test.ts
@@ -5,9 +5,13 @@ import {
   COMPACTION_SUMMARY_PREFIX,
   buildCompactionSummaryText,
   collectCompactableUserMessages,
+  compactionUserMessageDisposition,
   isCompactionSummaryMessage,
+  isRealUserInput,
   selectRecentUserMessages,
+  type CompactionUserDisposition,
 } from '../../../src/agent/compaction';
+import type { PromptOrigin } from '../../../src/agent/context/types';
 import { estimateTokens, estimateTokensForMessage } from '../../../src/utils/tokens';
 
 function textMessage(role: 'user' | 'assistant' | 'tool', text: string): Message {
@@ -18,6 +22,78 @@ function messageText(message: Message): string {
   return message.content.map((part) => (part.type === 'text' ? part.text : '')).join('');
 }
 
+const ALL_PROMPT_ORIGIN_KINDS = {
+  user: true,
+  skill_activation: true,
+  injection: true,
+  shell_command: true,
+  compaction_summary: true,
+  system_trigger: true,
+  background_task: true,
+  cron_job: true,
+  cron_missed: true,
+  hook_result: true,
+  retry: true,
+} satisfies Record<PromptOrigin['kind'], true>;
+
+const EXPECTED_DISPOSITION: Record<PromptOrigin['kind'], CompactionUserDisposition> = {
+  user: 'keep',
+  skill_activation: 'keep',
+  injection: 'drop',
+  shell_command: 'drop',
+  compaction_summary: 'drop',
+  system_trigger: 'drop',
+  background_task: 'drop',
+  cron_job: 'drop',
+  cron_missed: 'drop',
+  hook_result: 'drop',
+  retry: 'drop',
+};
+
+function originForKind(kind: PromptOrigin['kind']): PromptOrigin {
+  switch (kind) {
+    case 'user':
+      return { kind: 'user' };
+    case 'skill_activation':
+      return {
+        kind: 'skill_activation',
+        activationId: 'activation',
+        skillName: 'skill',
+        trigger: 'user-slash',
+      };
+    case 'injection':
+      return { kind: 'injection', variant: 'system_reminder' };
+    case 'shell_command':
+      return { kind: 'shell_command', phase: 'input' };
+    case 'compaction_summary':
+      return { kind: 'compaction_summary' };
+    case 'system_trigger':
+      return { kind: 'system_trigger', name: 'system' };
+    case 'background_task':
+      return {
+        kind: 'background_task',
+        taskId: 'task',
+        status: 'completed',
+        notificationId: 'notification',
+      };
+    case 'cron_job':
+      return {
+        kind: 'cron_job',
+        jobId: 'job',
+        cron: '* * * * *',
+        recurring: true,
+        coalescedCount: 1,
+        stale: false,
+      };
+    case 'cron_missed':
+      return { kind: 'cron_missed', count: 1 };
+    case 'hook_result':
+      return { kind: 'hook_result', event: 'PreCompact' };
+    case 'retry':
+      return { kind: 'retry', trigger: 'system' };
+  }
+}
+
 describe('isCompactionSummaryMessage', () => {
   it('detects the compaction origin', () => {
     const message = {
@@ -36,6 +112,45 @@ describe('isCompactionSummaryMessage', () => {
   });
 });
 
+describe('compactionUserMessageDisposition', () => {
+  it('classifies every prompt origin kind', () => {
+    for (const kind of Object.keys(ALL_PROMPT_ORIGIN_KINDS) as Array<PromptOrigin['kind']>) {
+      expect(compactionUserMessageDisposition(originForKind(kind))).toBe(EXPECTED_DISPOSITION[kind]);
+    }
+  });
+
+  it('drops model-triggered skill activations', () => {
+    expect(
+      compactionUserMessageDisposition({
+        kind: 'skill_activation',
+        activationId: 'activation',
+        skillName: 'skill',
+        trigger: 'model-tool',
+      }),
+    ).toBe('drop');
+  });
+});
+
+describe('isRealUserInput', () => {
+  it('keeps genuine user input and drops other origins', () => {
+    expect(isRealUserInput({ ...textMessage('user', 'hello'), origin: originForKind('user') })).toBe(
+      true,
+    );
+    expect(
+      isRealUserInput({ ...textMessage('user', 'hello'), origin: originForKind('skill_activation') }),
+    ).toBe(true);
+    expect(
+      isRealUserInput({ ...textMessage('user', 'hello'), origin: originForKind('injection') }),
+    ).toBe(false);
+    expect(
+      isRealUserInput({ ...textMessage('user', 'hello'), origin: originForKind('shell_command') }),
+    ).toBe(false);
+    expect(
+      isRealUserInput({ ...textMessage('user', 'hello'), origin: originForKind('background_task') }),
+    ).toBe(false);
+  });
+});
+
 describe('collectCompactableUserMessages', () => {
   it('keeps only user messages', () => {
     const messages = [
diff --git a/packages/agent-core/test/agent/context.test.ts b/packages/agent-core/test/agent/context.test.ts
index 99ced1192..d67fc0f6b 100644
--- a/packages/agent-core/test/agent/context.test.ts
+++ b/packages/agent-core/test/agent/context.test.ts
@@ -616,6 +616,37 @@ describe('Agent context', () => {
     await ctx.expectResumeMatches();
   });
 
+  it('applyCompaction keeps only real user input from mixed user-role history', () => {
+    const ctx = testAgent();
+    ctx.configure();
+
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: 'real prompt' }]);
+    ctx.agent.context.appendBashInput('pwd');
+    ctx.agent.context.appendBashOutput('/tmp/repo', '', false);
+    ctx.agent.context.appendLocalCommandStdout('local command output');
+    ctx.agent.context.appendSystemReminder('stale reminder', {
+      kind: 'injection',
+      variant: 'host',
+    });
+
+    const result = ctx.agent.context.applyCompaction({
+      summary: 'summary of mixed history',
+      compactedCount: 5,
+      tokensBefore: 100,
+    });
+    ctx.agent.context.appendSystemReminder('fresh reminder', {
+      kind: 'injection',
+      variant: 'host',
+    });
+
+    expect(ctx.agent.context.history.map(({ role, origin }) => ({ role, origin }))).toEqual([
+      { role: 'user', origin: { kind: 'user' } },
+      { role: 'user', origin: { kind: 'compaction_summary' } },
+      { role: 'user', origin: { kind: 'injection', variant: 'host' } },
+    ]);
+    expect(result.keptUserMessageCount).toBe(1);
+  });
+
   it('clears context before the next LLM request', async () => {
     const ctx = testAgent();
     ctx.configure();
diff --git a/packages/agent-core/test/services/message-transcript.test.ts b/packages/agent-core/test/services/message-transcript.test.ts
index b90cd4b31..e848656db 100644
--- a/packages/agent-core/test/services/message-transcript.test.ts
+++ b/packages/agent-core/test/services/message-transcript.test.ts
@@ -115,6 +115,43 @@ describe('reduceWireRecords', () => {
     expect(foldedLength).toBe(4);
   });
 
+  it('keeps shell and local-command output in the transcript but not foldedLength', () => {
+    const { entries, foldedLength } = reduceWireRecords([
+      appendMessage(userMessage('u1')),
+      appendMessage(userMessage('! pwd', { kind: 'shell_command', phase: 'input' })),
+      appendMessage(userMessage('local output', { kind: 'injection', variant: 'local-command-stdout' })),
+      ...assistantStep('s1', 'a1'),
+      {
+        type: 'context.apply_compaction',
+        summary: 'SUM',
+        compactedCount: 4,
+        tokensBefore: 100,
+        tokensAfter: 20,
+        keptUserMessageCount: 1,
+      } as AgentRecord,
+      appendMessage(userMessage('u2')),
+    ]);
+
+    expect(entries.map((e) => textOf(e.message))).toEqual([
+      'u1',
+      '! pwd',
+      'local output',
+      'a1',
+      'SUM',
+      'u2',
+    ]);
+    expect(entries.map((e) => e.message.role)).toEqual([
+      'user',
+      'user',
+      'user',
+      'assistant',
+      'user',
+      'user',
+    ]);
+    // 1 kept real user message + summary + u2 appended after compaction.
+    expect(foldedLength).toBe(3);
+  });
+
   it('handles repeated compactions', () => {
     const { entries, foldedLength } = reduceWireRecords([
       appendMessage(userMessage('u1')),

From 800792ab9c4292901f91f664a0790fcbb4fa4db1 Mon Sep 17 00:00:00 2001
From: 7Sageer <7sageer@djwcb.cn>
Date: Mon, 29 Jun 2026 19:27:57 +0800
Subject: [PATCH 11/16] fix(agent-core): tighten compaction context refresh

---
 .../agent-core/src/agent/compaction/full.ts   | 12 ++++-
 .../agent-core/src/agent/compaction/types.ts  | 18 +++++--
 .../agent-core/src/agent/context/index.ts     |  2 +
 packages/agent-core/src/agent/index.ts        | 42 ++++++++++++-----
 packages/agent-core/src/session/index.ts      | 39 ++++++++++++++-
 .../test/agent/compaction/full.test.ts        | 35 ++++++++++++++
 packages/agent-core/test/session/init.test.ts | 47 +++++++++++++++++++
 packages/kosong/src/providers/anthropic.ts    | 30 ++++--------
 packages/protocol/src/events.ts               |  8 ++++
 9 files changed, 193 insertions(+), 40 deletions(-)

diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts
index 3c26b1c27..f0e1e7395 100644
--- a/packages/agent-core/src/agent/compaction/full.ts
+++ b/packages/agent-core/src/agent/compaction/full.ts
@@ -379,9 +379,13 @@ export class FullCompaction {
       let summary: string | undefined;
       // Compact the whole history, dropping the oldest item on overflow to
       // preserve the prefix-cache-friendly tail. `historyForModel` is the
-      // (possibly trimmed) view sent to the model; the summary is always built
-      // from the untouched `originalHistory`.
+      // (possibly trimmed) view sent to the model. When it is trimmed, the
+      // dropped oldest messages are not covered by the produced summary (a
+      // kept real-user message among them may still be retained verbatim, but
+      // assistant/tool messages are lost); `droppedCount` tracks how many so
+      // records and telemetry can surface the summary's blind spot honestly.
       let historyForModel = originalHistory;
+      let droppedCount = 0;
       while (true) {
         const messages = [
           ...this.agent.context.project(historyForModel),
@@ -419,7 +423,9 @@ export class FullCompaction {
             // Dropping a bare `slice(1)` can strand a tool result at the front,
             // which the provider rejects as a malformed request. Trim any
             // leading tool results along with the oldest message.
+            const before = historyForModel.length;
             historyForModel = dropOldestMessageAndLeadingToolResults(historyForModel);
+            droppedCount += before - historyForModel.length;
             retryCount = 0;
             continue;
           }
@@ -452,6 +458,7 @@ export class FullCompaction {
         summary: summaryText,
         compactedCount: originalHistory.length,
         tokensBefore,
+        droppedCount: droppedCount === 0 ? undefined : droppedCount,
       });
 
       // Telemetry keys are snake_case, but the `context.apply_compaction`
@@ -464,6 +471,7 @@ export class FullCompaction {
         tokens_after: result.tokensAfter,
         duration_ms: Date.now() - startedAt,
         compacted_count: result.compactedCount,
+        dropped_count: result.droppedCount,
         retry_count: retryCount,
         round: 1,
         thinking_level: this.agent.config.thinkingLevel,
diff --git a/packages/agent-core/src/agent/compaction/types.ts b/packages/agent-core/src/agent/compaction/types.ts
index 92b55ad0e..80be2263c 100644
--- a/packages/agent-core/src/agent/compaction/types.ts
+++ b/packages/agent-core/src/agent/compaction/types.ts
@@ -12,16 +12,26 @@ export interface CompactionResult {
    * compatibility with older wire records.
    */
   keptUserMessageCount?: number;
+  /**
+   * Number of oldest messages trimmed from the summarizer input when the
+   * compaction request itself overflowed the model window. These messages are
+   * not covered by the produced summary — a real-user message among them may
+   * still be retained verbatim in the live context via `keptUserMessageCount`,
+   * but assistant/tool messages are lost. Surfacing the count lets records and
+   * telemetry report the summary's blind spot honestly. Optional for backward
+   * compatibility with older wire records.
+   */
+  droppedCount?: number;
 }
 
 /**
  * Inputs `ContextMemory.applyCompaction` needs to derive a `CompactionResult`.
- * `tokensAfter` / `keptUserMessageCount` are optional: the live path omits them
- * (they are derived from the current history), while restore passes the
- * persisted record so its historical values are preserved verbatim.
+ * `tokensAfter` / `keptUserMessageCount` / `droppedCount` are optional: the live
+ * path fills in what it knows, while restore passes the persisted record so its
+ * historical values are preserved verbatim.
  */
 export type CompactionInput = Pick<CompactionResult, 'summary' | 'compactedCount' | 'tokensBefore'> &
-  Partial<Pick<CompactionResult, 'tokensAfter' | 'keptUserMessageCount'>>;
+  Partial<Pick<CompactionResult, 'tokensAfter' | 'keptUserMessageCount' | 'droppedCount'>>;
 
 export type CompactionSource = 'manual' | 'auto';
 
diff --git a/packages/agent-core/src/agent/context/index.ts b/packages/agent-core/src/agent/context/index.ts
index 1e2f9b4b9..8e67fff30 100644
--- a/packages/agent-core/src/agent/context/index.ts
+++ b/packages/agent-core/src/agent/context/index.ts
@@ -237,6 +237,7 @@ export class ContextMemory {
       tokensBefore: input.tokensBefore,
       tokensAfter,
       keptUserMessageCount,
+      droppedCount: input.droppedCount,
     };
     this.agent.records.logRecord({
       type: 'context.apply_compaction',
@@ -249,6 +250,7 @@ export class ContextMemory {
         tokensBefore: result.tokensBefore,
         tokensAfter: result.tokensAfter,
         keptUserMessageCount: result.keptUserMessageCount,
+        droppedCount: result.droppedCount,
       },
     });
     this._history = [
diff --git a/packages/agent-core/src/agent/index.ts b/packages/agent-core/src/agent/index.ts
index 96aa460ec..e17b98c8b 100644
--- a/packages/agent-core/src/agent/index.ts
+++ b/packages/agent-core/src/agent/index.ts
@@ -86,6 +86,7 @@ export interface AgentOptions {
   readonly experimentalFlags?: ExperimentalFlagResolver;
   readonly replay?: ReplayBuilderOptions;
   readonly additionalDirs?: readonly string[];
+  readonly systemPromptContextProvider?: (() => Promise<PreparedSystemPromptContext>) | undefined;
 }
 
 export class Agent {
@@ -133,6 +134,7 @@ export class Agent {
   private additionalDirs: readonly string[];
   private activeProfile?: ResolvedAgentProfile;
   private brandHome?: string;
+  private readonly systemPromptContextProvider?: (() => Promise<PreparedSystemPromptContext>) | undefined;
 
   constructor(options: AgentOptions) {
     this.type = options.type ?? 'main';
@@ -151,6 +153,7 @@ export class Agent {
     this.telemetry = options.telemetry ?? noopTelemetryClient;
     this.experimentalFlags = options.experimentalFlags ?? new FlagResolver();
     this.additionalDirs = normalizeAdditionalDirs(options.additionalDirs ?? []);
+    this.systemPromptContextProvider = options.systemPromptContextProvider;
 
     this.llmRequestLogger = new LlmRequestLogger(this.log);
     this.blobStore = options.homedir
@@ -259,18 +262,14 @@ export class Agent {
     context?: PreparedSystemPromptContext,
     brandHome?: string,
   ): void {
+    this.setActiveProfile(profile, brandHome);
+    this.updateSystemPromptFromProfile(profile, context);
+    this.tools.setActiveTools(profile.tools);
+  }
+
+  setActiveProfile(profile: ResolvedAgentProfile, brandHome?: string): void {
     this.activeProfile = profile;
     this.brandHome = brandHome;
-    const systemPrompt = profile.systemPrompt({
-      osEnv: this.kaos.osEnv,
-      cwd: this.config.cwd,
-      skills: this.skills?.registry,
-      cwdListing: context?.cwdListing,
-      agentsMd: context?.agentsMd,
-      additionalDirsInfo: context?.additionalDirsInfo,
-    });
-    this.config.update({ profileName: profile.name, systemPrompt });
-    this.tools.setActiveTools(profile.tools);
   }
 
   /**
@@ -281,10 +280,27 @@ export class Agent {
    */
   async refreshSystemPrompt(): Promise<void> {
     if (this.activeProfile === undefined) return;
-    const context = await prepareSystemPromptContext(this.kaos, this.brandHome, {
-      additionalDirs: this.additionalDirs,
+    const context = this.systemPromptContextProvider === undefined
+      ? await prepareSystemPromptContext(this.kaos, this.brandHome, {
+          additionalDirs: this.additionalDirs,
+        })
+      : await this.systemPromptContextProvider();
+    this.updateSystemPromptFromProfile(this.activeProfile, context);
+  }
+
+  private updateSystemPromptFromProfile(
+    profile: ResolvedAgentProfile,
+    context?: PreparedSystemPromptContext,
+  ): void {
+    const systemPrompt = profile.systemPrompt({
+      osEnv: this.kaos.osEnv,
+      cwd: this.config.cwd,
+      skills: this.skills?.registry,
+      cwdListing: context?.cwdListing,
+      agentsMd: context?.agentsMd,
+      additionalDirsInfo: context?.additionalDirsInfo,
     });
-    this.useProfile(this.activeProfile, context, this.brandHome);
+    this.config.update({ profileName: profile.name, systemPrompt });
   }
 
   async resume(options?: AgentRecordsReplayOptions): Promise<{ warning?: string }> {
diff --git a/packages/agent-core/src/session/index.ts b/packages/agent-core/src/session/index.ts
index 948c28a49..c49de7f9e 100644
--- a/packages/agent-core/src/session/index.ts
+++ b/packages/agent-core/src/session/index.ts
@@ -718,7 +718,8 @@ export class Session {
   ): Agent {
     const parentAgent = parentAgentId !== null ? this.getReadyAgent(parentAgentId) : undefined;
     const cwd = parentAgent?.config.cwd ?? this.toolKaos.getcwd();
-    return new Agent({
+    let agent!: Agent;
+    agent = new Agent({
       ...config,
       type,
       kaos: this.toolKaos.withCwd(cwd),
@@ -737,7 +738,14 @@ export class Session {
       pluginSessionStarts: type === 'main' ? this.options.pluginSessionStarts : undefined,
       experimentalFlags: this.experimentalFlags,
       additionalDirs: parentAgent?.getAdditionalDirs() ?? this.additionalDirs,
+      systemPromptContextProvider: () =>
+        prepareSystemPromptContext(
+          this.systemContextKaos(agent.kaos.getcwd()),
+          this.options.kimiHomeDir,
+          { additionalDirs: agent.getAdditionalDirs() },
+        ),
     });
+    return agent;
   }
 
   private permissionOptions(
@@ -810,6 +818,7 @@ export class Session {
     try {
       const agent = this.instantiateAgent(id, meta.homedir, meta.type, {}, parentAgentId);
       const result = await agent.resume();
+      this.restoreAgentProfileHandle(agent, meta, parent?.agent);
       this.agents.set(id, agent);
       return { agent, warning: parent?.warning ?? result.warning };
     } catch (error) {
@@ -821,6 +830,34 @@ export class Session {
     }
   }
 
+  private restoreAgentProfileHandle(
+    agent: Agent,
+    meta: AgentMeta,
+    parentAgent: Agent | undefined,
+  ): void {
+    if (agent.config.systemPrompt === '') return;
+    const profile = this.resolvePersistedProfile(agent, meta, parentAgent);
+    if (profile === undefined) return;
+    agent.setActiveProfile(profile, this.options.kimiHomeDir);
+  }
+
+  private resolvePersistedProfile(
+    agent: Agent,
+    meta: AgentMeta,
+    parentAgent: Agent | undefined,
+  ): ResolvedAgentProfile | undefined {
+    const profileName = agent.config.profileName;
+    if (profileName === undefined) return undefined;
+    if (meta.type === 'sub') {
+      const parentProfileName = parentAgent?.config.profileName;
+      return (
+        DEFAULT_AGENT_PROFILES[parentProfileName ?? 'agent']?.subagents?.[profileName] ??
+        DEFAULT_AGENT_PROFILES['agent']?.subagents?.[profileName]
+      );
+    }
+    return DEFAULT_AGENT_PROFILES[profileName];
+  }
+
   private nextGeneratedAgentId(): string {
     while (true) {
       const id = `agent-${this.agentIdCounter++}`;
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index 3eceebb5b..91a04658d 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -228,6 +228,32 @@ describe('FullCompaction', () => {
     expect(refreshSpy).toHaveBeenCalledTimes(1);
   });
 
+  it('does not reset active tools while refreshing the system prompt after compaction', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.agent.useProfile({
+      name: 'tool-profile',
+      systemPrompt: () => '<profile-prompt>',
+      tools: ['Read', 'Write'],
+    });
+    ctx.agent.tools.setActiveTools(['Read']);
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
+
+    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.completed');
+
+    const activeTools = ctx.agent.tools
+      .data()
+      .filter((tool) => tool.active)
+      .map((tool) => tool.name)
+      .toSorted();
+    expect(activeTools).toEqual(['Read']);
+  });
+
   it('projects the compacted prefix before sending the summary request', async () => {
     const ctx = testAgent({ compactionStrategy: alwaysCompactOnce });
     ctx.configure({
@@ -1602,6 +1628,10 @@ describe('FullCompaction', () => {
       modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
     });
     ctx.appendToolExchange();
+    let applyRecord: { compactedCount?: number; droppedCount?: number } | undefined;
+    ctx.emitter.on('context.apply_compaction', (entry) => {
+      applyRecord = (entry as { args: { compactedCount?: number; droppedCount?: number } }).args;
+    });
     const compacted = ctx.once('context.apply_compaction');
 
     await ctx.rpc.beginCompaction({});
@@ -1615,6 +1645,11 @@ describe('FullCompaction', () => {
     ]);
     expect(inputs[2]?.map((entry) => entry.split(':', 1)[0])).toEqual(['user']);
     expect(inputs[2]?.[0]).toBe('user: <compaction-instruction>');
+    // The whole 3-message history was folded (compactedCount), and all 3 were
+    // trimmed from the summarizer input on overflow (droppedCount), so the
+    // record honestly reports that the summary covers none of them.
+    expect(applyRecord?.compactedCount).toBe(3);
+    expect(applyRecord?.droppedCount).toBe(3);
     await ctx.expectResumeMatches();
   });
 
diff --git a/packages/agent-core/test/session/init.test.ts b/packages/agent-core/test/session/init.test.ts
index 89657684a..ec3148787 100644
--- a/packages/agent-core/test/session/init.test.ts
+++ b/packages/agent-core/test/session/init.test.ts
@@ -166,6 +166,53 @@ describe('Session.init', () => {
     }
   });
 
+  it('refreshes AGENTS.md from a resumed native session system prompt', async () => {
+    const workDir = await makeTempDir();
+    const sessionDir = await makeTempDir();
+    await mkdir(join(workDir, '.git'));
+    await writeFile(join(workDir, 'AGENTS.md'), 'initial resume instructions', 'utf-8');
+
+    const firstSession = new Session({
+      id: 'test-resume-system-prompt-refresh',
+      kaos: testKaos.withCwd(workDir),
+      persistenceKaos: testKaos.withCwd(workDir),
+      homedir: sessionDir,
+      rpc: createSessionRpc([]),
+      skills: { explicitDirs: [join(workDir, 'missing-skills')] },
+      providerManager: testProviderManager(),
+    });
+    try {
+      const agent = await firstSession.createMain();
+      expect(agent.config.systemPrompt).toContain('initial resume instructions');
+    } finally {
+      await firstSession.closeForReload();
+    }
+
+    await writeFile(join(workDir, 'AGENTS.md'), 'updated resume instructions', 'utf-8');
+
+    const resumedSession = new Session({
+      id: 'test-resume-system-prompt-refresh',
+      kaos: testKaos.withCwd(workDir),
+      persistenceKaos: testKaos.withCwd(workDir),
+      homedir: sessionDir,
+      rpc: createSessionRpc([]),
+      skills: { explicitDirs: [join(workDir, 'missing-skills')] },
+      providerManager: testProviderManager(),
+    });
+    try {
+      await resumedSession.resume();
+      const resumedAgent = await resumedSession.ensureAgentResumed('main');
+      expect(resumedAgent.config.systemPrompt).toContain('initial resume instructions');
+
+      await resumedAgent.refreshSystemPrompt();
+
+      expect(resumedAgent.config.systemPrompt).toContain('updated resume instructions');
+      expect(resumedAgent.config.systemPrompt).not.toContain('initial resume instructions');
+    } finally {
+      await resumedSession.close();
+    }
+  });
+
   it('rebuilds builtin tools when rebinding the session tool kaos', async () => {
     const workDir = await makeTempDir();
     const sessionDir = await makeTempDir();
diff --git a/packages/kosong/src/providers/anthropic.ts b/packages/kosong/src/providers/anthropic.ts
index 1b43abdda..07ddf0d22 100644
--- a/packages/kosong/src/providers/anthropic.ts
+++ b/packages/kosong/src/providers/anthropic.ts
@@ -392,23 +392,6 @@ function injectCacheControlOnLastBlock(messages: MessageParam[]): void {
   }
 }
 
-/**
- * Check whether a MessageParam is a user message whose content consists
- * entirely of `tool_result` blocks.
- *
- * Used to detect adjacent tool-result-only messages that must be merged
- * before hitting the Anthropic wire. Per the Messages API parallel-tool-use
- * spec, all `tool_result` blocks answering parallel `tool_use` calls must
- * live in a single user message — splitting them across consecutive user
- * messages fails on strict Anthropic-compatible backends (HTTP 400) and
- * silently degrades parallel tool use on api.anthropic.com.
- */
-function isToolResultOnly(message: MessageParam): boolean {
-  if (message.role !== 'user') return false;
-  const content = message.content;
-  if (!Array.isArray(content) || content.length === 0) return false;
-  return content.every((block) => block.type === 'tool_result');
-}
 interface AnthropicImageBlock {
   type: 'image';
   source: { type: 'base64'; data: string; media_type: string } | { type: 'url'; url: string };
@@ -1000,8 +983,15 @@ export class AnthropicChatProvider implements ChatProvider {
         ]
       : undefined;
 
-    // Convert messages, merging consecutive tool-result-only user messages
-    // into a single user message (Anthropic parallel-tool-use spec).
+    // Convert messages, merging consecutive user messages into one. Strict
+    // Anthropic-compatible backends reject consecutive user messages with HTTP
+    // 400 ("roles must alternate"), and api.anthropic.com concatenates them
+    // anyway — so merging is safe for native Anthropic and required for strict
+    // backends. This subsumes the parallel-tool-use requirement that all
+    // tool_result blocks answering parallel tool_use calls live in a single
+    // user message. Consecutive user messages arise naturally after compaction
+    // (kept user prompts + user-role summary + injected reminders) and from
+    // back-to-back system/tool messages converted to user role above.
     const messages: MessageParam[] = [];
     const normalizedHistory = normalizeToolCallIdsForProvider(
       history,
@@ -1010,7 +1000,7 @@ export class AnthropicChatProvider implements ChatProvider {
     for (const msg of normalizedHistory) {
       const converted = convertMessage(msg, this._model);
       const last = messages.at(-1);
-      if (last !== undefined && isToolResultOnly(last) && isToolResultOnly(converted)) {
+      if (last !== undefined && last.role === 'user' && converted.role === 'user') {
         last.content = [
           ...(last.content as ContentBlockParam[]),
           ...(converted.content as ContentBlockParam[]),
diff --git a/packages/protocol/src/events.ts b/packages/protocol/src/events.ts
index 190eeca8e..937ef5510 100644
--- a/packages/protocol/src/events.ts
+++ b/packages/protocol/src/events.ts
@@ -293,6 +293,13 @@ export interface CompactionResult {
    * Optional for backward compatibility with older wire records.
    */
   readonly keptUserMessageCount?: number;
+  /**
+   * Oldest messages trimmed from the summarizer input when the compaction
+   * request overflowed the model window; not covered by the produced summary.
+   * Mirrors agent-core's `CompactionResult.droppedCount`; optional for backward
+   * compatibility.
+   */
+  readonly droppedCount?: number;
 }
 
 export interface ToolUpdate {
@@ -954,6 +961,7 @@ export const compactionResultSchema = z.object({
   tokensBefore: z.number(),
   tokensAfter: z.number(),
   keptUserMessageCount: z.number().optional(),
+  droppedCount: z.number().optional(),
 }) satisfies z.ZodType<CompactionResult>;
 
 export const toolUpdateSchema = z.object({

From 5a0b3ca9f429804dd6ae446a69d09ae4cf564790 Mon Sep 17 00:00:00 2001
From: 7Sageer <7sageer@djwcb.cn>
Date: Mon, 29 Jun 2026 19:32:23 +0800
Subject: [PATCH 12/16] fix(kosong): merge only same-kind consecutive user
 messages in anthropic adapter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Strict Anthropic-compatible backends reject consecutive user messages with
HTTP 400, so the adapter collapses them — but a plain-text user turn and an
adjacent tool-result user message carry different semantics and must stay
separate. Merge plain-text with plain-text (collapsing the post-compaction
run of kept prompts + user-role summary + reminders) and tool-result with
tool-result (parallel-tool-use spec), but not across the two kinds.
---
 packages/kosong/src/providers/anthropic.ts | 41 ++++++++++++++++------
 packages/kosong/test/anthropic.test.ts     | 22 ++++++++++++
 2 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/packages/kosong/src/providers/anthropic.ts b/packages/kosong/src/providers/anthropic.ts
index 07ddf0d22..a4e8bbe29 100644
--- a/packages/kosong/src/providers/anthropic.ts
+++ b/packages/kosong/src/providers/anthropic.ts
@@ -392,6 +392,18 @@ function injectCacheControlOnLastBlock(messages: MessageParam[]): void {
   }
 }
 
+/**
+ * Whether a user MessageParam consists solely of `tool_result` blocks. Used to
+ * keep tool results bundled with each other (parallel-tool-use spec) while
+ * not merging a tool-result user message into an adjacent plain-text user
+ * message — the two carry different semantics and must stay separate.
+ */
+function isToolResultOnly(message: MessageParam): boolean {
+  if (message.role !== 'user') return false;
+  const content = message.content;
+  if (!Array.isArray(content) || content.length === 0) return false;
+  return content.every((block) => block.type === 'tool_result');
+}
 interface AnthropicImageBlock {
   type: 'image';
   source: { type: 'base64'; data: string; media_type: string } | { type: 'url'; url: string };
@@ -983,15 +995,19 @@ export class AnthropicChatProvider implements ChatProvider {
         ]
       : undefined;
 
-    // Convert messages, merging consecutive user messages into one. Strict
-    // Anthropic-compatible backends reject consecutive user messages with HTTP
-    // 400 ("roles must alternate"), and api.anthropic.com concatenates them
-    // anyway — so merging is safe for native Anthropic and required for strict
-    // backends. This subsumes the parallel-tool-use requirement that all
-    // tool_result blocks answering parallel tool_use calls live in a single
-    // user message. Consecutive user messages arise naturally after compaction
-    // (kept user prompts + user-role summary + injected reminders) and from
-    // back-to-back system/tool messages converted to user role above.
+    // Convert messages, merging consecutive user messages of the same kind into
+    // one. Strict Anthropic-compatible backends reject consecutive user messages
+    // with HTTP 400 ("roles must alternate"), and api.anthropic.com concatenates
+    // them anyway — so merging is safe for native Anthropic and required for
+    // strict backends. Plain-text user messages merge with plain-text user
+    // messages; tool-result-only user messages merge with tool-result-only ones
+    // (the parallel-tool-use spec requires all tool_result blocks answering
+    // parallel tool_use calls to live in a single user message). A plain-text
+    // user message is intentionally NOT merged into an adjacent tool-result one:
+    // the two carry different semantics and must stay separate. Consecutive
+    // plain-text user messages arise naturally after compaction (kept user
+    // prompts + user-role summary + injected reminders) and from back-to-back
+    // system messages converted to user role above.
     const messages: MessageParam[] = [];
     const normalizedHistory = normalizeToolCallIdsForProvider(
       history,
@@ -1000,7 +1016,12 @@ export class AnthropicChatProvider implements ChatProvider {
     for (const msg of normalizedHistory) {
       const converted = convertMessage(msg, this._model);
       const last = messages.at(-1);
-      if (last !== undefined && last.role === 'user' && converted.role === 'user') {
+      if (
+        last !== undefined &&
+        last.role === 'user' &&
+        converted.role === 'user' &&
+        isToolResultOnly(last) === isToolResultOnly(converted)
+      ) {
         last.content = [
           ...(last.content as ContentBlockParam[]),
           ...(converted.content as ContentBlockParam[]),
diff --git a/packages/kosong/test/anthropic.test.ts b/packages/kosong/test/anthropic.test.ts
index 3bd9fc70a..e3a64ac1f 100644
--- a/packages/kosong/test/anthropic.test.ts
+++ b/packages/kosong/test/anthropic.test.ts
@@ -1024,6 +1024,28 @@ describe('AnthropicChatProvider', () => {
       expect(msgs[3]!.content[0]!.text).toBe('Now summarize');
     });
 
+    it('merges consecutive plain-text user messages into one', async () => {
+      const provider = createProvider();
+      const history: Message[] = [
+        { role: 'user', content: [{ type: 'text', text: 'First' }], toolCalls: [] },
+        { role: 'user', content: [{ type: 'text', text: 'Second' }], toolCalls: [] },
+        { role: 'user', content: [{ type: 'text', text: 'Third' }], toolCalls: [] },
+      ];
+      const body = await captureRequestBody(provider, '', [], history);
+
+      const msgs = body['messages'] as Array<{
+        role: string;
+        content: Array<{ type: string; text?: string }>;
+      }>;
+
+      // Strict Anthropic-compatible backends reject consecutive user messages,
+      // so back-to-back plain-text user turns (e.g. the post-compaction shape
+      // of kept prompts + user-role summary + reminders) must be collapsed.
+      expect(msgs).toHaveLength(1);
+      expect(msgs[0]!.role).toBe('user');
+      expect(msgs[0]!.content.map((block) => block.text)).toEqual(['First', 'Second', 'Third']);
+    });
+
     it('assistant with thinking (has encrypted -> ThinkingBlockParam)', async () => {
       const provider = createProvider();
       const history: Message[] = [

From be6ea51640169c30176670b7d60000be69715a97 Mon Sep 17 00:00:00 2001
From: 7Sageer <7sageer@djwcb.cn>
Date: Mon, 29 Jun 2026 21:02:00 +0800
Subject: [PATCH 13/16] fix(agent-core): restore compaction instruction
 structure

---
 .../compaction/compaction-instruction.md      | 62 +++++++++++++++++--
 .../test/agent/compaction/full.test.ts        | 46 +++++++-------
 2 files changed, 80 insertions(+), 28 deletions(-)

diff --git a/packages/agent-core/src/agent/compaction/compaction-instruction.md b/packages/agent-core/src/agent/compaction/compaction-instruction.md
index 3f4bcacd3..e9cb44121 100644
--- a/packages/agent-core/src/agent/compaction/compaction-instruction.md
+++ b/packages/agent-core/src/agent/compaction/compaction-instruction.md
@@ -1,10 +1,62 @@
 You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.
 
-Include:
-- Current progress and key decisions made
-- Important context, constraints, or user preferences
-- What remains to be done (clear next steps)
-- Any critical data, examples, or references needed to continue
+--- This message is a direct task, not part of the above conversation ---
+
+You are now given a task to compact this conversation context according to the priorities and output requirements below.
+
+The goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.
+
+Compression priorities, in order:
+
+1. Current Task State: what is being worked on right now
+2. Errors & Solutions: unresolved or recurring errors and their resolutions
+3. Code Evolution: final working versions only; remove intermediate attempts
+4. System Context: project structure, dependencies, environment setup
+5. Design Decisions: architectural choices and their rationale
+6. TODO Items: unfinished tasks and known issues
+
+Required output structure:
+
+## Current Focus
+
+[What we're working on now]
+
+## Environment
+
+- [Key setup/config points]
+- ...
+
+## Completed Tasks
+
+- [Task]: [Brief outcome]
+- ...
+
+## Active Issues
+
+- [Issue]: [Status/Next steps]
+- ...
+
+## Code State
+
+### [Critical file name]
+
+[Brief description of the file's purpose and current state]
+
+```
+[The latest version of critical code snippets in this file, <20 lines]
+```
+
+### [Critical file name]
+
+- [Useful classes/methods/functions]: [Brief description/usage]
+- ...
+
+Omit non-critical code, intermediate attempts, and resolved errors.
+
+## Important Context
+
+- [Any crucial information not covered above]
+- ...
 
 Be concise, structured, and focused on helping the next LLM seamlessly continue the work.
 
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index 91a04658d..6d2e053e4 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -76,10 +76,10 @@ describe('FullCompaction', () => {
       [wire] context.append_message     { "message": { "role": "user", "content": [ { "type": "text", "text": "recent user three" } ], "toolCalls": [], "origin": { "kind": "user" } }, "time": "<time>" }
       [wire] full_compaction.begin      { "source": "manual", "instruction": "Keep the important test facts.", "time": "<time>" }
       [emit] compaction.started         { "trigger": "manual", "instruction": "Keep the important test facts." }
-      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 184, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 120, "maxContextTokens": 256000, "contextUsage": 0.00046875, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 184, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 184, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 500, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 120, "maxContextTokens": 256000, "contextUsage": 0.00046875, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 500, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 500, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] context.apply_compaction   { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119, "keptUserMessageCount": 3, "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 119, "maxContextTokens": 256000, "contextUsage": 0.00046484375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 184, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 184, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 119, "maxContextTokens": 256000, "contextUsage": 0.00046484375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 500, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 500, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete   { "time": "<time>" }
       [emit] compaction.completed       { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119, "keptUserMessageCount": 3 } }
     `);
@@ -93,7 +93,7 @@ describe('FullCompaction', () => {
         assistant: text "old assistant two"
         user: text "recent user three"
         assistant: text "recent assistant three"
-        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history.\\n\\nKeep the important test facts."
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nYou are now given a task to compact this conversation context according to the priorities and output requirements below.\\n\\nThe goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.\\n\\nCompression priorities, in order:\\n\\n1. Current Task State: what is being worked on right now\\n2. Errors & Solutions: unresolved or recurring errors and their resolutions\\n3. Code Evolution: final working versions only; remove intermediate attempts\\n4. System Context: project structure, dependencies, environment setup\\n5. Design Decisions: architectural choices and their rationale\\n6. TODO Items: unfinished tasks and known issues\\n\\nRequired output structure:\\n\\n## Current Focus\\n\\n[What we're working on now]\\n\\n## Environment\\n\\n- [Key setup/config points]\\n- ...\\n\\n## Completed Tasks\\n\\n- [Task]: [Brief outcome]\\n- ...\\n\\n## Active Issues\\n\\n- [Issue]: [Status/Next steps]\\n- ...\\n\\n## Code State\\n\\n### [Critical file name]\\n\\n[Brief description of the file's purpose and current state]\\n\\n\`\`\`\\n[The latest version of critical code snippets in this file, <20 lines]\\n\`\`\`\\n\\n### [Critical file name]\\n\\n- [Useful classes/methods/functions]: [Brief description/usage]\\n- ...\\n\\nOmit non-critical code, intermediate attempts, and resolved errors.\\n\\n## Important Context\\n\\n- [Any crucial information not covered above]\\n- ...\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history.\\n\\nKeep the important test facts."
     `);
     expect(ctx.compactHistory()).toMatchInlineSnapshot(`
       [
@@ -126,7 +126,7 @@ describe('FullCompaction', () => {
         compacted_count: 6,
         retry_count: 0,
         thinking_level: 'off',
-        input_tokens: 184,
+        input_tokens: 500,
         output_tokens: 8,
       }),
     });
@@ -946,7 +946,7 @@ describe('FullCompaction', () => {
         user: text "run both tools"
         assistant: []  calls call_open_one:LookupOne { "query": "one" }, call_open_two:LookupTwo { "query": "two" }
         tool[call_open_one]: text "one result"
-        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history.\\n\\nKeep stable facts."
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nYou are now given a task to compact this conversation context according to the priorities and output requirements below.\\n\\nThe goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.\\n\\nCompression priorities, in order:\\n\\n1. Current Task State: what is being worked on right now\\n2. Errors & Solutions: unresolved or recurring errors and their resolutions\\n3. Code Evolution: final working versions only; remove intermediate attempts\\n4. System Context: project structure, dependencies, environment setup\\n5. Design Decisions: architectural choices and their rationale\\n6. TODO Items: unfinished tasks and known issues\\n\\nRequired output structure:\\n\\n## Current Focus\\n\\n[What we're working on now]\\n\\n## Environment\\n\\n- [Key setup/config points]\\n- ...\\n\\n## Completed Tasks\\n\\n- [Task]: [Brief outcome]\\n- ...\\n\\n## Active Issues\\n\\n- [Issue]: [Status/Next steps]\\n- ...\\n\\n## Code State\\n\\n### [Critical file name]\\n\\n[Brief description of the file's purpose and current state]\\n\\n\`\`\`\\n[The latest version of critical code snippets in this file, <20 lines]\\n\`\`\`\\n\\n### [Critical file name]\\n\\n- [Useful classes/methods/functions]: [Brief description/usage]\\n- ...\\n\\nOmit non-critical code, intermediate attempts, and resolved errors.\\n\\n## Important Context\\n\\n- [Any crucial information not covered above]\\n- ...\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history.\\n\\nKeep stable facts."
     `);
     // The unresolved tool exchange is sent to the model (see the compaction input
     // above) but is dropped from the replacement history, leaving only the real
@@ -996,10 +996,10 @@ describe('FullCompaction', () => {
       [wire] full_compaction.begin      { "source": "manual", "time": "<time>" }
       [emit] compaction.started         { "trigger": "manual" }
       [wire] context.append_message     { "message": { "role": "user", "content": [ { "type": "text", "text": "new user while compacting" } ], "toolCalls": [], "origin": { "kind": "user" } }, "time": "<time>" }
-      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 162, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 80, "maxContextTokens": 256000, "contextUsage": 0.0003125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 162, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 162, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 478, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 80, "maxContextTokens": 256000, "contextUsage": 0.0003125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 478, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 478, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] context.apply_compaction   { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 122, "keptUserMessageCount": 3, "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 122, "maxContextTokens": 256000, "contextUsage": 0.0004765625, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 162, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 162, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 122, "maxContextTokens": 256000, "contextUsage": 0.0004765625, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 478, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 478, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete   { "time": "<time>" }
       [emit] compaction.completed       { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 122, "keptUserMessageCount": 3 } }
     `);
@@ -1011,7 +1011,7 @@ describe('FullCompaction', () => {
         assistant: text "old assistant one"
         user: text "recent user two"
         assistant: text "recent assistant two"
-        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history."
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nYou are now given a task to compact this conversation context according to the priorities and output requirements below.\\n\\nThe goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.\\n\\nCompression priorities, in order:\\n\\n1. Current Task State: what is being worked on right now\\n2. Errors & Solutions: unresolved or recurring errors and their resolutions\\n3. Code Evolution: final working versions only; remove intermediate attempts\\n4. System Context: project structure, dependencies, environment setup\\n5. Design Decisions: architectural choices and their rationale\\n6. TODO Items: unfinished tasks and known issues\\n\\nRequired output structure:\\n\\n## Current Focus\\n\\n[What we're working on now]\\n\\n## Environment\\n\\n- [Key setup/config points]\\n- ...\\n\\n## Completed Tasks\\n\\n- [Task]: [Brief outcome]\\n- ...\\n\\n## Active Issues\\n\\n- [Issue]: [Status/Next steps]\\n- ...\\n\\n## Code State\\n\\n### [Critical file name]\\n\\n[Brief description of the file's purpose and current state]\\n\\n\`\`\`\\n[The latest version of critical code snippets in this file, <20 lines]\\n\`\`\`\\n\\n### [Critical file name]\\n\\n- [Useful classes/methods/functions]: [Brief description/usage]\\n- ...\\n\\nOmit non-critical code, intermediate attempts, and resolved errors.\\n\\n## Important Context\\n\\n- [Any crucial information not covered above]\\n- ...\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history."
     `);
     expect(ctx.compactHistory()).toMatchInlineSnapshot(`
       [
@@ -1060,8 +1060,8 @@ describe('FullCompaction', () => {
       [emit] compaction.started       { "trigger": "manual" }
       [wire] context.clear            { "time": "<time>" }
       [emit] agent.status.updated     { "model": "kimi-code", "contextTokens": 0, "maxContextTokens": 256000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual" }
-      [wire] usage.record             { "model": "kimi-code", "usage": { "inputOther": 162, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated     { "model": "kimi-code", "contextTokens": 0, "maxContextTokens": 256000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 162, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 162, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record             { "model": "kimi-code", "usage": { "inputOther": 478, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated     { "model": "kimi-code", "contextTokens": 0, "maxContextTokens": 256000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 478, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 478, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.cancel   { "time": "<time>" }
       [emit] compaction.cancelled     {}
     `);
@@ -1073,7 +1073,7 @@ describe('FullCompaction', () => {
         assistant: text "old assistant one"
         user: text "recent user two"
         assistant: text "recent assistant two"
-        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history."
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nYou are now given a task to compact this conversation context according to the priorities and output requirements below.\\n\\nThe goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.\\n\\nCompression priorities, in order:\\n\\n1. Current Task State: what is being worked on right now\\n2. Errors & Solutions: unresolved or recurring errors and their resolutions\\n3. Code Evolution: final working versions only; remove intermediate attempts\\n4. System Context: project structure, dependencies, environment setup\\n5. Design Decisions: architectural choices and their rationale\\n6. TODO Items: unfinished tasks and known issues\\n\\nRequired output structure:\\n\\n## Current Focus\\n\\n[What we're working on now]\\n\\n## Environment\\n\\n- [Key setup/config points]\\n- ...\\n\\n## Completed Tasks\\n\\n- [Task]: [Brief outcome]\\n- ...\\n\\n## Active Issues\\n\\n- [Issue]: [Status/Next steps]\\n- ...\\n\\n## Code State\\n\\n### [Critical file name]\\n\\n[Brief description of the file's purpose and current state]\\n\\n\`\`\`\\n[The latest version of critical code snippets in this file, <20 lines]\\n\`\`\`\\n\\n### [Critical file name]\\n\\n- [Useful classes/methods/functions]: [Brief description/usage]\\n- ...\\n\\nOmit non-critical code, intermediate attempts, and resolved errors.\\n\\n## Important Context\\n\\n- [Any crucial information not covered above]\\n- ...\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history."
     `);
     expect(ctx.compactHistory()).toMatchInlineSnapshot(`[]`);
     await ctx.expectResumeMatches();
@@ -1104,10 +1104,10 @@ describe('FullCompaction', () => {
       [wire] full_compaction.begin       { "source": "auto", "time": "<time>" }
       [emit] compaction.started          { "trigger": "auto" }
       [emit] compaction.blocked          { "turnId": 0 }
-      [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 183, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 950000, "maxContextTokens": 256000, "contextUsage": 3.7109375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 183, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 183, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 499, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 950000, "maxContextTokens": 256000, "contextUsage": 3.7109375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 499, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 499, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] context.apply_compaction    { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nAuto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 127, "keptUserMessageCount": 4, "time": "<time>" }
-      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 127, "maxContextTokens": 256000, "contextUsage": 0.00049609375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 183, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 183, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 127, "maxContextTokens": 256000, "contextUsage": 0.00049609375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 499, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 499, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete    { "time": "<time>" }
       [emit] compaction.completed        { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nAuto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 127, "keptUserMessageCount": 4 } }
       [wire] context.append_loop_event   { "event": { "type": "step.begin", "uuid": "<uuid-1>", "turnId": "0", "step": 1 }, "time": "<time>" }
@@ -1117,7 +1117,7 @@ describe('FullCompaction', () => {
       [wire] context.append_loop_event   { "event": { "type": "step.end", "uuid": "<uuid-1>", "turnId": "0", "step": 1, "usage": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "end_turn" }, "time": "<time>" }
       [emit] turn.step.completed         { "turnId": 0, "step": 1, "stepId": "<uuid-1>", "usage": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "end_turn" }
       [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "turn", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 137, "maxContextTokens": 256000, "contextUsage": 0.00053515625, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 309, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 309, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 137, "maxContextTokens": 256000, "contextUsage": 0.00053515625, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 625, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 625, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [emit] turn.ended                  { "turnId": 0, "reason": "completed" }
     `);
     expect(ctx.llmInputs()).toMatchInlineSnapshot(`
@@ -1132,7 +1132,7 @@ describe('FullCompaction', () => {
           user: text "recent user three"
           assistant: text "recent assistant three"
           user: text "Answer after compacting"
-          user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history."
+          user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nYou are now given a task to compact this conversation context according to the priorities and output requirements below.\\n\\nThe goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.\\n\\nCompression priorities, in order:\\n\\n1. Current Task State: what is being worked on right now\\n2. Errors & Solutions: unresolved or recurring errors and their resolutions\\n3. Code Evolution: final working versions only; remove intermediate attempts\\n4. System Context: project structure, dependencies, environment setup\\n5. Design Decisions: architectural choices and their rationale\\n6. TODO Items: unfinished tasks and known issues\\n\\nRequired output structure:\\n\\n## Current Focus\\n\\n[What we're working on now]\\n\\n## Environment\\n\\n- [Key setup/config points]\\n- ...\\n\\n## Completed Tasks\\n\\n- [Task]: [Brief outcome]\\n- ...\\n\\n## Active Issues\\n\\n- [Issue]: [Status/Next steps]\\n- ...\\n\\n## Code State\\n\\n### [Critical file name]\\n\\n[Brief description of the file's purpose and current state]\\n\\n\`\`\`\\n[The latest version of critical code snippets in this file, <20 lines]\\n\`\`\`\\n\\n### [Critical file name]\\n\\n- [Useful classes/methods/functions]: [Brief description/usage]\\n- ...\\n\\nOmit non-critical code, intermediate attempts, and resolved errors.\\n\\n## Important Context\\n\\n- [Any crucial information not covered above]\\n- ...\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history."
 
       call 2:
         messages:
@@ -2062,10 +2062,10 @@ describe('FullCompaction', () => {
       [wire] full_compaction.begin       { "source": "auto", "time": "<time>" }
       [emit] compaction.started          { "trigger": "auto" }
       [emit] compaction.blocked          { "turnId": 0 }
-      [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 145, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 0, "maxContextTokens": 1000000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 145, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 145, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 461, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 0, "maxContextTokens": 1000000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 461, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 461, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] context.apply_compaction    { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nFirst compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 114, "keptUserMessageCount": 1, "time": "<time>" }
-      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 114, "maxContextTokens": 1000000, "contextUsage": 0.000114, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 145, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 145, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 114, "maxContextTokens": 1000000, "contextUsage": 0.000114, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 461, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 461, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete    { "time": "<time>" }
       [emit] compaction.completed        { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nFirst compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 114, "keptUserMessageCount": 1 } }
       [wire] context.append_loop_event   { "event": { "type": "step.begin", "uuid": "<uuid-1>", "turnId": "0", "step": 1 }, "time": "<time>" }
@@ -2080,7 +2080,7 @@ describe('FullCompaction', () => {
       [wire] context.append_loop_event   { "event": { "type": "step.end", "uuid": "<uuid-1>", "turnId": "0", "step": 1, "usage": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "tool_use" }, "time": "<time>" }
       [emit] turn.step.completed         { "turnId": 0, "step": 1, "stepId": "<uuid-1>", "usage": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "tool_use" }
       [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "turn", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 126, "maxContextTokens": 1000000, "contextUsage": 0.000126, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 260, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 260, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 126, "maxContextTokens": 1000000, "contextUsage": 0.000126, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 576, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 576, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [emit] turn.step.interrupted       { "turnId": 0, "step": 2, "reason": "error", "message": "Compaction limit exceeded (1)" }
       [emit] turn.ended                  { "turnId": 0, "reason": "failed", "error": { "code": "context.overflow", "message": "Compaction limit exceeded (1)", "name": "KimiError", "details": { "maxCompactions": 1, "turnId": 0 }, "retryable": true } }
     `);
@@ -2093,7 +2093,7 @@ describe('FullCompaction', () => {
         tools: []
         messages:
           user: text "Trigger repeated compaction"
-          user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\nInclude:\\n- Current progress and key decisions made\\n- Important context, constraints, or user preferences\\n- What remains to be done (clear next steps)\\n- Any critical data, examples, or references needed to continue\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history."
+          user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nYou are now given a task to compact this conversation context according to the priorities and output requirements below.\\n\\nThe goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.\\n\\nCompression priorities, in order:\\n\\n1. Current Task State: what is being worked on right now\\n2. Errors & Solutions: unresolved or recurring errors and their resolutions\\n3. Code Evolution: final working versions only; remove intermediate attempts\\n4. System Context: project structure, dependencies, environment setup\\n5. Design Decisions: architectural choices and their rationale\\n6. TODO Items: unfinished tasks and known issues\\n\\nRequired output structure:\\n\\n## Current Focus\\n\\n[What we're working on now]\\n\\n## Environment\\n\\n- [Key setup/config points]\\n- ...\\n\\n## Completed Tasks\\n\\n- [Task]: [Brief outcome]\\n- ...\\n\\n## Active Issues\\n\\n- [Issue]: [Status/Next steps]\\n- ...\\n\\n## Code State\\n\\n### [Critical file name]\\n\\n[Brief description of the file's purpose and current state]\\n\\n\`\`\`\\n[The latest version of critical code snippets in this file, <20 lines]\\n\`\`\`\\n\\n### [Critical file name]\\n\\n- [Useful classes/methods/functions]: [Brief description/usage]\\n- ...\\n\\nOmit non-critical code, intermediate attempts, and resolved errors.\\n\\n## Important Context\\n\\n- [Any crucial information not covered above]\\n- ...\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history."
 
       call 2:
         messages:

From d58625a5a1ae0442c8b28b6c890f6d79e52ce75a Mon Sep 17 00:00:00 2001
From: 7Sageer <7sageer@djwcb.cn>
Date: Mon, 29 Jun 2026 21:14:09 +0800
Subject: [PATCH 14/16] fix(agent-core): render compaction custom instruction
 via template

---
 .../src/agent/compaction/compaction-instruction.md   |  5 +++++
 packages/agent-core/src/agent/compaction/full.ts     |  9 ++++-----
 .../agent-core/test/agent/compaction/full.test.ts    | 12 ++++++------
 packages/agent-core/test/prompt-placeholders.test.ts |  1 +
 4 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/packages/agent-core/src/agent/compaction/compaction-instruction.md b/packages/agent-core/src/agent/compaction/compaction-instruction.md
index e9cb44121..9ffd4b010 100644
--- a/packages/agent-core/src/agent/compaction/compaction-instruction.md
+++ b/packages/agent-core/src/agent/compaction/compaction-instruction.md
@@ -61,3 +61,8 @@ Omit non-critical code, intermediate attempts, and resolved errors.
 Be concise, structured, and focused on helping the next LLM seamlessly continue the work.
 
 Respond with text only. Do not call any tools — you already have everything you need in the conversation history.
+
+{% if customInstruction %}
+Optional user instruction:
+{{ customInstruction }}
+{% endif %}
diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts
index f0e1e7395..be7f73a1b 100644
--- a/packages/agent-core/src/agent/compaction/full.ts
+++ b/packages/agent-core/src/agent/compaction/full.ts
@@ -36,6 +36,7 @@ import {
   applyCompletionBudget,
   resolveCompletionBudget,
 } from '../../utils/completion-budget';
+import { renderPrompt } from '../../utils/render-prompt';
 import compactionInstructionTemplate from './compaction-instruction.md?raw';
 import type { CompactionBeginData, CompactionResult } from './types';
 import {
@@ -323,11 +324,9 @@ export class FullCompaction {
   }
 
   private buildInstruction(customInstruction: string | undefined): string {
-    const base = compactionInstructionTemplate.trimEnd();
-    if (customInstruction === undefined || customInstruction.trim().length === 0) {
-      return base;
-    }
-    return `${base}\n\n${customInstruction}`;
+    return renderPrompt(compactionInstructionTemplate, {
+      customInstruction: customInstruction?.trim() ?? '',
+    }).trimEnd();
   }
 
   private postProcessSummary(summary: string): string {
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index 6d2e053e4..24df3576a 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -76,10 +76,10 @@ describe('FullCompaction', () => {
       [wire] context.append_message     { "message": { "role": "user", "content": [ { "type": "text", "text": "recent user three" } ], "toolCalls": [], "origin": { "kind": "user" } }, "time": "<time>" }
       [wire] full_compaction.begin      { "source": "manual", "instruction": "Keep the important test facts.", "time": "<time>" }
       [emit] compaction.started         { "trigger": "manual", "instruction": "Keep the important test facts." }
-      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 500, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 120, "maxContextTokens": 256000, "contextUsage": 0.00046875, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 500, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 500, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 507, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 120, "maxContextTokens": 256000, "contextUsage": 0.00046875, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 507, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 507, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] context.apply_compaction   { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119, "keptUserMessageCount": 3, "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 119, "maxContextTokens": 256000, "contextUsage": 0.00046484375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 500, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 500, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 119, "maxContextTokens": 256000, "contextUsage": 0.00046484375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 507, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 507, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete   { "time": "<time>" }
       [emit] compaction.completed       { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119, "keptUserMessageCount": 3 } }
     `);
@@ -93,7 +93,7 @@ describe('FullCompaction', () => {
         assistant: text "old assistant two"
         user: text "recent user three"
         assistant: text "recent assistant three"
-        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nYou are now given a task to compact this conversation context according to the priorities and output requirements below.\\n\\nThe goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.\\n\\nCompression priorities, in order:\\n\\n1. Current Task State: what is being worked on right now\\n2. Errors & Solutions: unresolved or recurring errors and their resolutions\\n3. Code Evolution: final working versions only; remove intermediate attempts\\n4. System Context: project structure, dependencies, environment setup\\n5. Design Decisions: architectural choices and their rationale\\n6. TODO Items: unfinished tasks and known issues\\n\\nRequired output structure:\\n\\n## Current Focus\\n\\n[What we're working on now]\\n\\n## Environment\\n\\n- [Key setup/config points]\\n- ...\\n\\n## Completed Tasks\\n\\n- [Task]: [Brief outcome]\\n- ...\\n\\n## Active Issues\\n\\n- [Issue]: [Status/Next steps]\\n- ...\\n\\n## Code State\\n\\n### [Critical file name]\\n\\n[Brief description of the file's purpose and current state]\\n\\n\`\`\`\\n[The latest version of critical code snippets in this file, <20 lines]\\n\`\`\`\\n\\n### [Critical file name]\\n\\n- [Useful classes/methods/functions]: [Brief description/usage]\\n- ...\\n\\nOmit non-critical code, intermediate attempts, and resolved errors.\\n\\n## Important Context\\n\\n- [Any crucial information not covered above]\\n- ...\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history.\\n\\nKeep the important test facts."
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nYou are now given a task to compact this conversation context according to the priorities and output requirements below.\\n\\nThe goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.\\n\\nCompression priorities, in order:\\n\\n1. Current Task State: what is being worked on right now\\n2. Errors & Solutions: unresolved or recurring errors and their resolutions\\n3. Code Evolution: final working versions only; remove intermediate attempts\\n4. System Context: project structure, dependencies, environment setup\\n5. Design Decisions: architectural choices and their rationale\\n6. TODO Items: unfinished tasks and known issues\\n\\nRequired output structure:\\n\\n## Current Focus\\n\\n[What we're working on now]\\n\\n## Environment\\n\\n- [Key setup/config points]\\n- ...\\n\\n## Completed Tasks\\n\\n- [Task]: [Brief outcome]\\n- ...\\n\\n## Active Issues\\n\\n- [Issue]: [Status/Next steps]\\n- ...\\n\\n## Code State\\n\\n### [Critical file name]\\n\\n[Brief description of the file's purpose and current state]\\n\\n\`\`\`\\n[The latest version of critical code snippets in this file, <20 lines]\\n\`\`\`\\n\\n### [Critical file name]\\n\\n- [Useful classes/methods/functions]: [Brief description/usage]\\n- ...\\n\\nOmit non-critical code, intermediate attempts, and resolved errors.\\n\\n## Important Context\\n\\n- [Any crucial information not covered above]\\n- ...\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history.\\n\\n\\nOptional user instruction:\\nKeep the important test facts."
     `);
     expect(ctx.compactHistory()).toMatchInlineSnapshot(`
       [
@@ -126,7 +126,7 @@ describe('FullCompaction', () => {
         compacted_count: 6,
         retry_count: 0,
         thinking_level: 'off',
-        input_tokens: 500,
+        input_tokens: 507,
         output_tokens: 8,
       }),
     });
@@ -946,7 +946,7 @@ describe('FullCompaction', () => {
         user: text "run both tools"
         assistant: []  calls call_open_one:LookupOne { "query": "one" }, call_open_two:LookupTwo { "query": "two" }
         tool[call_open_one]: text "one result"
-        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nYou are now given a task to compact this conversation context according to the priorities and output requirements below.\\n\\nThe goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.\\n\\nCompression priorities, in order:\\n\\n1. Current Task State: what is being worked on right now\\n2. Errors & Solutions: unresolved or recurring errors and their resolutions\\n3. Code Evolution: final working versions only; remove intermediate attempts\\n4. System Context: project structure, dependencies, environment setup\\n5. Design Decisions: architectural choices and their rationale\\n6. TODO Items: unfinished tasks and known issues\\n\\nRequired output structure:\\n\\n## Current Focus\\n\\n[What we're working on now]\\n\\n## Environment\\n\\n- [Key setup/config points]\\n- ...\\n\\n## Completed Tasks\\n\\n- [Task]: [Brief outcome]\\n- ...\\n\\n## Active Issues\\n\\n- [Issue]: [Status/Next steps]\\n- ...\\n\\n## Code State\\n\\n### [Critical file name]\\n\\n[Brief description of the file's purpose and current state]\\n\\n\`\`\`\\n[The latest version of critical code snippets in this file, <20 lines]\\n\`\`\`\\n\\n### [Critical file name]\\n\\n- [Useful classes/methods/functions]: [Brief description/usage]\\n- ...\\n\\nOmit non-critical code, intermediate attempts, and resolved errors.\\n\\n## Important Context\\n\\n- [Any crucial information not covered above]\\n- ...\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history.\\n\\nKeep stable facts."
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nYou are now given a task to compact this conversation context according to the priorities and output requirements below.\\n\\nThe goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.\\n\\nCompression priorities, in order:\\n\\n1. Current Task State: what is being worked on right now\\n2. Errors & Solutions: unresolved or recurring errors and their resolutions\\n3. Code Evolution: final working versions only; remove intermediate attempts\\n4. System Context: project structure, dependencies, environment setup\\n5. Design Decisions: architectural choices and their rationale\\n6. TODO Items: unfinished tasks and known issues\\n\\nRequired output structure:\\n\\n## Current Focus\\n\\n[What we're working on now]\\n\\n## Environment\\n\\n- [Key setup/config points]\\n- ...\\n\\n## Completed Tasks\\n\\n- [Task]: [Brief outcome]\\n- ...\\n\\n## Active Issues\\n\\n- [Issue]: [Status/Next steps]\\n- ...\\n\\n## Code State\\n\\n### [Critical file name]\\n\\n[Brief description of the file's purpose and current state]\\n\\n\`\`\`\\n[The latest version of critical code snippets in this file, <20 lines]\\n\`\`\`\\n\\n### [Critical file name]\\n\\n- [Useful classes/methods/functions]: [Brief description/usage]\\n- ...\\n\\nOmit non-critical code, intermediate attempts, and resolved errors.\\n\\n## Important Context\\n\\n- [Any crucial information not covered above]\\n- ...\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history.\\n\\n\\nOptional user instruction:\\nKeep stable facts."
     `);
     // The unresolved tool exchange is sent to the model (see the compaction input
     // above) but is dropped from the replacement history, leaving only the real
diff --git a/packages/agent-core/test/prompt-placeholders.test.ts b/packages/agent-core/test/prompt-placeholders.test.ts
index b3b506431..9566415c7 100644
--- a/packages/agent-core/test/prompt-placeholders.test.ts
+++ b/packages/agent-core/test/prompt-placeholders.test.ts
@@ -22,6 +22,7 @@ const SRC = join(import.meta.dirname, '..', 'src');
 // `.md` files rendered through `renderPrompt`. Keep in sync when a new
 // templated prompt file is introduced.
 const TEMPLATED = new Set([
+  'agent/compaction/compaction-instruction.md',
   'profile/default/system.md',
   'tools/builtin/file/read.md',
   'tools/builtin/file/read-media.md',

From 600504512d0a78398dfdfbf02d1dd4969d121eac Mon Sep 17 00:00:00 2001
From: 7Sageer <7sageer@djwcb.cn>
Date: Mon, 29 Jun 2026 21:56:14 +0800
Subject: [PATCH 15/16] fix(agent-core): harden compaction edge cases

---
 .../agent-core/src/agent/compaction/full.ts   | 71 +++++++++++++++----
 .../src/agent/compaction/memento.ts           |  3 +-
 .../test/agent/compaction/full.test.ts        | 66 +++++++++++++----
 .../test/agent/compaction/memento.test.ts     | 10 ++-
 4 files changed, 120 insertions(+), 30 deletions(-)

diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts
index be7f73a1b..3291928bf 100644
--- a/packages/agent-core/src/agent/compaction/full.ts
+++ b/packages/agent-core/src/agent/compaction/full.ts
@@ -29,6 +29,7 @@ import {
 } from '../../tools/builtin/state/todo-list';
 import {
   estimateTokens,
+  estimateTokensForMessage,
   estimateTokensForMessages,
   estimateTokensForTools,
 } from '../../utils/tokens';
@@ -376,15 +377,12 @@ export class FullCompaction {
       const delays = retryBackoffDelays(MAX_COMPACTION_RETRY_ATTEMPTS);
       let usage: TokenUsage | null = null;
       let summary: string | undefined;
-      // Compact the whole history, dropping the oldest item on overflow to
-      // preserve the prefix-cache-friendly tail. `historyForModel` is the
-      // (possibly trimmed) view sent to the model. When it is trimmed, the
-      // dropped oldest messages are not covered by the produced summary (a
-      // kept real-user message among them may still be retained verbatim, but
-      // assistant/tool messages are lost); `droppedCount` tracks how many so
-      // records and telemetry can surface the summary's blind spot honestly.
+      // Compact the whole history, trimming old messages only when the
+      // summarizer request itself cannot fit. Any trimmed messages are not
+      // covered by the produced summary; `droppedCount` reports that blind spot.
       let historyForModel = originalHistory;
       let droppedCount = 0;
+      let overflowShrinkCount = 0;
       while (true) {
         const messages = [
           ...this.agent.context.project(historyForModel),
@@ -414,14 +412,24 @@ export class FullCompaction {
           if (isContextOverflow) {
             this.observeContextOverflow(estimatedCompactionRequestTokens);
           }
-          const isOverflow =
-            isContextOverflow ||
+          if (isContextOverflow && historyForModel.length > 1) {
+            overflowShrinkCount += 1;
+            if (overflowShrinkCount > MAX_COMPACTION_OVERFLOW_SHRINK_ATTEMPTS) {
+              throw error;
+            }
+            const before = historyForModel.length;
+            historyForModel = shrinkCompactionHistoryAfterOverflow(
+              historyForModel,
+              overflowShrinkCount,
+            );
+            droppedCount += before - historyForModel.length;
+            retryCount = 0;
+            continue;
+          }
+          const shouldShrinkAfterEmptyOrTruncated =
             error instanceof CompactionTruncatedError ||
             error instanceof APIEmptyResponseError;
-          if (isOverflow && historyForModel.length > 1) {
-            // Dropping a bare `slice(1)` can strand a tool result at the front,
-            // which the provider rejects as a malformed request. Trim any
-            // leading tool results along with the oldest message.
+          if (shouldShrinkAfterEmptyOrTruncated && historyForModel.length > 1) {
             const before = historyForModel.length;
             historyForModel = dropOldestMessageAndLeadingToolResults(historyForModel);
             droppedCount += before - historyForModel.length;
@@ -527,11 +535,46 @@ export class FullCompaction {
   }
 }
 
+const MAX_COMPACTION_OVERFLOW_SHRINK_ATTEMPTS = 3;
+const COMPACTION_OVERFLOW_SHRINK_RATIOS = [0.7, 0.5, 0.35] as const;
+
+function shrinkCompactionHistoryAfterOverflow<T extends Message>(
+  messages: readonly T[],
+  attempt: number,
+): T[] {
+  if (messages.length <= 1) return messages.slice();
+  const ratio = COMPACTION_OVERFLOW_SHRINK_RATIOS[
+    Math.min(attempt - 1, COMPACTION_OVERFLOW_SHRINK_RATIOS.length - 1)
+  ]!;
+  const tokenBudget = Math.floor(estimateTokensForMessages(messages) * ratio);
+  return takeRecentMessagesWithinTokenBudget(messages, tokenBudget);
+}
+
+function takeRecentMessagesWithinTokenBudget<T extends Message>(
+  messages: readonly T[],
+  tokenBudget: number,
+): T[] {
+  let start = messages.length;
+  let tokens = 0;
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const messageTokens = estimateTokensForMessage(messages[i]!);
+    if (tokens + messageTokens > tokenBudget) break;
+    tokens += messageTokens;
+    start = i;
+  }
+  if (start === 0) start = 1;
+  return dropLeadingToolResults(messages.slice(start));
+}
+
 function dropOldestMessageAndLeadingToolResults<T extends { readonly role: string }>(
   messages: readonly T[],
 ): T[] {
   if (messages.length <= 1) return messages.slice();
-  let start = 1;
+  return dropLeadingToolResults(messages.slice(1));
+}
+
+function dropLeadingToolResults<T extends { readonly role: string }>(messages: readonly T[]): T[] {
+  let start = 0;
   while (start < messages.length && messages[start]!.role === 'tool') {
     start += 1;
   }
diff --git a/packages/agent-core/src/agent/compaction/memento.ts b/packages/agent-core/src/agent/compaction/memento.ts
index db6329169..061f50f05 100644
--- a/packages/agent-core/src/agent/compaction/memento.ts
+++ b/packages/agent-core/src/agent/compaction/memento.ts
@@ -77,8 +77,7 @@ function extractText(content: readonly ContentPart[]): string {
 }
 
 export function isCompactionSummaryMessage(message: MessageLike): boolean {
-  if (message.origin?.kind === 'compaction_summary') return true;
-  return extractText(message.content).startsWith(`${COMPACTION_SUMMARY_PREFIX}\n`);
+  return message.origin?.kind === 'compaction_summary';
 }
 
 /**
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index 24df3576a..566bba98f 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -1613,12 +1613,10 @@ describe('FullCompaction', () => {
 
   it('does not leave an orphan tool result at the start when reducing overflowing compaction input', async () => {
     const inputs: string[][] = [];
-    let callCount = 0;
     const generate: GenerateFn = async (_provider, _system, _tools, history) => {
-      callCount += 1;
       inputs.push(inputHistorySnapshot(history));
-      if (callCount <= 2) {
-        throw new APIContextOverflowError(400, 'Context length exceeded', `req-compact-overflow-${String(callCount)}`);
+      if (inputs.length === 1) {
+        throw new APIContextOverflowError(400, 'Context length exceeded', 'req-compact-overflow');
       }
       return textResult('Reduced tool history summary.');
     };
@@ -1633,18 +1631,15 @@ describe('FullCompaction', () => {
       applyRecord = (entry as { args: { compactedCount?: number; droppedCount?: number } }).args;
     });
     const compacted = ctx.once('context.apply_compaction');
+    const completed = ctx.once('compaction.completed');
 
     await ctx.rpc.beginCompaction({});
     await compacted;
+    await completed;
 
-    expect(inputs).toHaveLength(3);
-    expect(inputs[1]?.map((entry) => entry.split(':', 1)[0])).toEqual([
-      'assistant',
-      'tool',
-      'user',
-    ]);
-    expect(inputs[2]?.map((entry) => entry.split(':', 1)[0])).toEqual(['user']);
-    expect(inputs[2]?.[0]).toBe('user: <compaction-instruction>');
+    expect(inputs).toHaveLength(2);
+    const reducedHistory = inputs[1]!.slice(0, -1);
+    expect(reducedHistory[0]?.split(':', 1)[0]).not.toBe('tool');
     // The whole 3-message history was folded (compactedCount), and all 3 were
     // trimmed from the summarizer input on overflow (droppedCount), so the
     // record honestly reports that the summary covers none of them.
@@ -1653,6 +1648,53 @@ describe('FullCompaction', () => {
     await ctx.expectResumeMatches();
   });
 
+  it('shrinks overflowing compaction input aggressively instead of one message at a time', async () => {
+    const inputs: string[][] = [];
+    let applyRecord: { compactedCount?: number; droppedCount?: number } | undefined;
+    const generate: GenerateFn = async (_provider, _system, _tools, history) => {
+      inputs.push(inputHistorySnapshot(history));
+      const compactedHistory = history.slice(0, -1);
+      if (compactedHistory.length > 20) {
+        throw new APIContextOverflowError(
+          400,
+          'Context length exceeded',
+          `req-long-compact-${String(inputs.length)}`,
+        );
+      }
+      return textResult('Aggressively reduced summary.');
+    };
+    const ctx = testAgent({ generate });
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    for (let i = 0; i < 30; i++) {
+      ctx.appendExchange(
+        i,
+        `old user ${String(i)} ${'u'.repeat(400)}`,
+        `old assistant ${String(i)} ${'a'.repeat(400)}`,
+        10,
+      );
+    }
+    ctx.emitter.on('context.apply_compaction', (entry) => {
+      applyRecord = (entry as { args: { compactedCount?: number; droppedCount?: number } }).args;
+    });
+    const compacted = ctx.once('context.apply_compaction');
+    const completed = ctx.once('compaction.completed');
+
+    await ctx.rpc.beginCompaction({});
+    await compacted;
+    await completed;
+
+    expect(inputs[0]?.length).toBeGreaterThan(50);
+    expect(inputs.length).toBeLessThanOrEqual(4);
+    const finalCompactedHistory = inputs.at(-1)!.slice(0, -1);
+    expect(finalCompactedHistory[0]?.split(':', 1)[0]).not.toBe('tool');
+    expect(applyRecord?.compactedCount).toBe(60);
+    expect(applyRecord?.droppedCount).toBeGreaterThan(0);
+    await ctx.expectResumeMatches();
+  });
+
   it('recovers from plain 413 when estimated request is over effective max', async () => {
     let callCount = 0;
     const generate: GenerateFn = async (_provider, _system, _tools, _history, callbacks) => {
diff --git a/packages/agent-core/test/agent/compaction/memento.test.ts b/packages/agent-core/test/agent/compaction/memento.test.ts
index 61b703eb3..912f247a1 100644
--- a/packages/agent-core/test/agent/compaction/memento.test.ts
+++ b/packages/agent-core/test/agent/compaction/memento.test.ts
@@ -103,8 +103,14 @@ describe('isCompactionSummaryMessage', () => {
     expect(isCompactionSummaryMessage(message)).toBe(true);
   });
 
-  it('detects the summary prefix', () => {
-    expect(isCompactionSummaryMessage(textMessage('user', `${COMPACTION_SUMMARY_PREFIX}\nsummary`))).toBe(true);
+  it('keeps real user prompts even when they start with the summary prefix', () => {
+    const message = {
+      ...textMessage('user', `${COMPACTION_SUMMARY_PREFIX}\nsummary`),
+      origin: { kind: 'user' as const },
+    };
+
+    expect(isCompactionSummaryMessage(message)).toBe(false);
+    expect(collectCompactableUserMessages([message])).toEqual([message]);
   });
 
   it('ignores ordinary user messages', () => {

From 4baee9836710b10e94079d5df3e047ab2f07ad8b Mon Sep 17 00:00:00 2001
From: 7Sageer <7sageer@djwcb.cn>
Date: Mon, 29 Jun 2026 22:42:39 +0800
Subject: [PATCH 16/16] fix(agent-core): avoid leaking compaction summary
 prefix

---
 apps/vis/server/src/lib/context-projector.ts  | 12 ++++-
 .../server/test/lib/context-projector.test.ts | 21 ++++++++
 .../agent-core/src/agent/compaction/full.ts   | 10 ++--
 .../agent-core/src/agent/compaction/types.ts  | 10 +++-
 .../agent-core/src/agent/context/index.ts     | 10 ++--
 .../test/agent/compaction/full.test.ts        | 52 +++++++++++++++----
 6 files changed, 96 insertions(+), 19 deletions(-)

diff --git a/apps/vis/server/src/lib/context-projector.ts b/apps/vis/server/src/lib/context-projector.ts
index 9e3722455..40cd5810b 100644
--- a/apps/vis/server/src/lib/context-projector.ts
+++ b/apps/vis/server/src/lib/context-projector.ts
@@ -266,6 +266,16 @@ export function projectContext(
             tokensAfter: rec.tokensAfter,
           },
         };
+        const modelSummaryBubble: ProjectedMessage =
+          rec.contextSummary === undefined
+            ? summaryBubble
+            : {
+                ...summaryBubble,
+                message: {
+                  ...summaryBubble.message,
+                  content: [{ type: 'text', text: rec.contextSummary }],
+                } as ContextMessage,
+              };
         if (mode === 'model') {
           // Rebuild the model's-eye view as the kept user messages + summary.
           // `realUserEntries` is filtered with the exact
@@ -289,7 +299,7 @@ export function projectContext(
             const original = realUserEntries[suffixStart + i]!;
             return original.message === message ? original : { ...original, message };
           });
-          messages = [...keptEntries, summaryBubble];
+          messages = [...keptEntries, modelSummaryBubble];
         } else {
           // Full history: keep ALL preceding messages, just append the summary
           // marker inline so the compacted prefix stays visible.
diff --git a/apps/vis/server/test/lib/context-projector.test.ts b/apps/vis/server/test/lib/context-projector.test.ts
index 176c53a8b..fa21b0789 100644
--- a/apps/vis/server/test/lib/context-projector.test.ts
+++ b/apps/vis/server/test/lib/context-projector.test.ts
@@ -275,6 +275,27 @@ describe('context-projector', () => {
     expect(proj.messages[2]!.message.content[0]).toMatchObject({ text: 'new' });
   });
 
+  it('uses contextSummary only for the model view and raw summary for full history', () => {
+    const entries = [
+      { lineNo: 1, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'old' }], toolCalls: [] } }, raw: {} },
+      { lineNo: 2, data: { type: 'context.apply_compaction' as const,
+          summary: 'raw summary', contextSummary: 'prefixed summary', compactedCount: 1, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
+    ];
+
+    const model = projectContext(entries as any);
+    expect(model.messages.map((m) => m.message.content[0])).toMatchObject([
+      { text: 'old' },
+      { text: 'prefixed summary' },
+    ]);
+
+    const full = projectContext(entries as any, 'full');
+    expect(full.messages.map((m) => m.message.content[0])).toMatchObject([
+      { text: 'old' },
+      { text: 'raw summary' },
+    ]);
+  });
+
   it('apply_compaction keeps the most recent user messages and drops the assistant/tool tail', () => {
     const entries = [
       { lineNo: 1, data: { type: 'context.append_message' as const,
diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts
index 3291928bf..84a9c3502 100644
--- a/packages/agent-core/src/agent/compaction/full.ts
+++ b/packages/agent-core/src/agent/compaction/full.ts
@@ -306,7 +306,9 @@ export class FullCompaction {
       } catch (error) {
         this.agent.log.error('failed to refresh system prompt after compaction', { error });
       }
-      this.agent.emitEvent({ type: 'compaction.completed', result });
+      const { contextSummary: _contextSummary, ...eventResult } = result;
+      void _contextSummary;
+      this.agent.emitEvent({ type: 'compaction.completed', result: eventResult });
       await this.agent.injection.injectAfterCompaction();
       this.triggerPostCompactHook(data, result);
     } catch (error) {
@@ -460,9 +462,11 @@ export class FullCompaction {
         }
       }
 
-      const summaryText = buildCompactionSummaryText(this.postProcessSummary(summary ?? ''));
+      const rawSummary = this.postProcessSummary(summary ?? '');
+      const contextSummary = buildCompactionSummaryText(rawSummary);
       const result = this.agent.context.applyCompaction({
-        summary: summaryText,
+        summary: rawSummary,
+        contextSummary,
         compactedCount: originalHistory.length,
         tokensBefore,
         droppedCount: droppedCount === 0 ? undefined : droppedCount,
diff --git a/packages/agent-core/src/agent/compaction/types.ts b/packages/agent-core/src/agent/compaction/types.ts
index 80be2263c..cef3c5308 100644
--- a/packages/agent-core/src/agent/compaction/types.ts
+++ b/packages/agent-core/src/agent/compaction/types.ts
@@ -1,5 +1,13 @@
 export interface CompactionResult {
+  /** Human-facing summary text produced by the compaction model. */
   summary: string;
+  /**
+   * Exact summary message stored in the live model context. It includes the
+   * compaction prefix that tells the next model this is handoff context rather
+   * than a real user prompt. Optional for backward compatibility with older
+   * wire records, where `summary` was also the model-context text.
+   */
+  contextSummary?: string;
   compactedCount: number;
   tokensBefore: number;
   tokensAfter: number;
@@ -31,7 +39,7 @@ export interface CompactionResult {
  * historical values are preserved verbatim.
  */
 export type CompactionInput = Pick<CompactionResult, 'summary' | 'compactedCount' | 'tokensBefore'> &
-  Partial<Pick<CompactionResult, 'tokensAfter' | 'keptUserMessageCount' | 'droppedCount'>>;
+  Partial<Pick<CompactionResult, 'contextSummary' | 'tokensAfter' | 'keptUserMessageCount' | 'droppedCount'>>;
 
 export type CompactionSource = 'manual' | 'auto';
 
diff --git a/packages/agent-core/src/agent/context/index.ts b/packages/agent-core/src/agent/context/index.ts
index 8e67fff30..eab7337e0 100644
--- a/packages/agent-core/src/agent/context/index.ts
+++ b/packages/agent-core/src/agent/context/index.ts
@@ -226,13 +226,16 @@ export class ContextMemory {
     );
     // Live compaction omits these so they are derived from the actual
     // `_history`; restore passes the persisted record so its historical values
-    // are preserved verbatim.
+    // are preserved verbatim. Older wire records did not have `contextSummary`,
+    // so their `summary` remains the model-context text during restore.
+    const contextSummary = input.contextSummary ?? input.summary;
     const tokensAfter =
       input.tokensAfter ??
-      estimateTokens(input.summary) + estimateTokensForMessages(keptUserMessages);
+      estimateTokens(contextSummary) + estimateTokensForMessages(keptUserMessages);
     const keptUserMessageCount = input.keptUserMessageCount ?? keptUserMessages.length;
     const result: CompactionResult = {
       summary: input.summary,
+      contextSummary,
       compactedCount: input.compactedCount,
       tokensBefore: input.tokensBefore,
       tokensAfter,
@@ -246,6 +249,7 @@ export class ContextMemory {
     this.agent.replayBuilder.patchLast('compaction', {
       result: {
         summary: result.summary,
+        contextSummary: result.contextSummary,
         compactedCount: result.compactedCount,
         tokensBefore: result.tokensBefore,
         tokensAfter: result.tokensAfter,
@@ -257,7 +261,7 @@ export class ContextMemory {
       ...keptUserMessages,
       {
         role: 'user',
-        content: [{ type: 'text', text: result.summary }],
+        content: [{ type: 'text', text: contextSummary }],
         toolCalls: [],
         origin: { kind: 'compaction_summary' },
       },
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index 566bba98f..284c48cdd 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -78,10 +78,10 @@ describe('FullCompaction', () => {
       [emit] compaction.started         { "trigger": "manual", "instruction": "Keep the important test facts." }
       [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 507, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
       [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 120, "maxContextTokens": 256000, "contextUsage": 0.00046875, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 507, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 507, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction   { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119, "keptUserMessageCount": 3, "time": "<time>" }
+      [wire] context.apply_compaction   { "summary": "Compacted summary.", "contextSummary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119, "keptUserMessageCount": 3, "time": "<time>" }
       [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 119, "maxContextTokens": 256000, "contextUsage": 0.00046484375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 507, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 507, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete   { "time": "<time>" }
-      [emit] compaction.completed       { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119, "keptUserMessageCount": 3 } }
+      [emit] compaction.completed       { "result": { "summary": "Compacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119, "keptUserMessageCount": 3 } }
     `);
     expect(ctx.lastLlmInput()).toMatchInlineSnapshot(`
       system: <system-prompt>
@@ -133,6 +133,34 @@ describe('FullCompaction', () => {
     await ctx.expectResumeMatches();
   });
 
+  it('emits the raw summary while keeping the prefixed summary in model context', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
+
+    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.completed');
+
+    const completedEvent = ctx.allEvents.find((entry) => entry.event === 'compaction.completed');
+    expect(completedEvent?.args).toEqual({
+      result: expect.objectContaining({
+        summary: 'Compacted summary.',
+      }),
+    });
+    expect(completedEvent?.args).not.toEqual({
+      result: expect.objectContaining({
+        summary: expect.stringContaining(COMPACTION_SUMMARY_PREFIX),
+      }),
+    });
+    expect(ctx.agent.context.history.at(-1)?.content).toEqual([
+      { type: 'text', text: `${COMPACTION_SUMMARY_PREFIX}\nCompacted summary.` },
+    ]);
+  });
+
   it('keeps only real user input and re-injects permission reminders after compaction', async () => {
     const ctx = testAgent();
     ctx.configure({
@@ -200,12 +228,14 @@ describe('FullCompaction', () => {
       keptUserMessageCount?: number;
       tokensAfter?: number;
       summary?: string;
+      contextSummary?: string;
     };
     expect(record.keptUserMessageCount).toBe(2);
-    const expectedSummary = `${COMPACTION_SUMMARY_PREFIX}\nCompacted summary.`;
-    expect(record.summary).toBe(expectedSummary);
+    const expectedContextSummary = `${COMPACTION_SUMMARY_PREFIX}\nCompacted summary.`;
+    expect(record.summary).toBe('Compacted summary.');
+    expect(record.contextSummary).toBe(expectedContextSummary);
     expect(record.tokensAfter).toBe(
-      estimateTokens(expectedSummary) +
+      estimateTokens(expectedContextSummary) +
         estimateTokensForMessages(ctx.agent.context.history.slice(0, 2)),
     );
   });
@@ -998,10 +1028,10 @@ describe('FullCompaction', () => {
       [wire] context.append_message     { "message": { "role": "user", "content": [ { "type": "text", "text": "new user while compacting" } ], "toolCalls": [], "origin": { "kind": "user" } }, "time": "<time>" }
       [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 478, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
       [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 80, "maxContextTokens": 256000, "contextUsage": 0.0003125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 478, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 478, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction   { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 122, "keptUserMessageCount": 3, "time": "<time>" }
+      [wire] context.apply_compaction   { "summary": "Compacted prefix.", "contextSummary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 122, "keptUserMessageCount": 3, "time": "<time>" }
       [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 122, "maxContextTokens": 256000, "contextUsage": 0.0004765625, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 478, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 478, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete   { "time": "<time>" }
-      [emit] compaction.completed       { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 122, "keptUserMessageCount": 3 } }
+      [emit] compaction.completed       { "result": { "summary": "Compacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 122, "keptUserMessageCount": 3 } }
     `);
     expect(ctx.lastLlmInput()).toMatchInlineSnapshot(`
       system: <system-prompt>
@@ -1106,10 +1136,10 @@ describe('FullCompaction', () => {
       [emit] compaction.blocked          { "turnId": 0 }
       [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 499, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
       [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 950000, "maxContextTokens": 256000, "contextUsage": 3.7109375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 499, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 499, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction    { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nAuto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 127, "keptUserMessageCount": 4, "time": "<time>" }
+      [wire] context.apply_compaction    { "summary": "Auto compacted summary.", "contextSummary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nAuto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 127, "keptUserMessageCount": 4, "time": "<time>" }
       [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 127, "maxContextTokens": 256000, "contextUsage": 0.00049609375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 499, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 499, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete    { "time": "<time>" }
-      [emit] compaction.completed        { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nAuto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 127, "keptUserMessageCount": 4 } }
+      [emit] compaction.completed        { "result": { "summary": "Auto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 127, "keptUserMessageCount": 4 } }
       [wire] context.append_loop_event   { "event": { "type": "step.begin", "uuid": "<uuid-1>", "turnId": "0", "step": 1 }, "time": "<time>" }
       [emit] turn.step.started           { "turnId": 0, "step": 1, "stepId": "<uuid-1>" }
       [emit] assistant.delta             { "turnId": 0, "delta": "I can answer after compaction." }
@@ -2106,10 +2136,10 @@ describe('FullCompaction', () => {
       [emit] compaction.blocked          { "turnId": 0 }
       [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 461, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
       [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 0, "maxContextTokens": 1000000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 461, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 461, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction    { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nFirst compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 114, "keptUserMessageCount": 1, "time": "<time>" }
+      [wire] context.apply_compaction    { "summary": "First compacted summary.", "contextSummary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nFirst compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 114, "keptUserMessageCount": 1, "time": "<time>" }
       [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 114, "maxContextTokens": 1000000, "contextUsage": 0.000114, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 461, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 461, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete    { "time": "<time>" }
-      [emit] compaction.completed        { "result": { "summary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nFirst compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 114, "keptUserMessageCount": 1 } }
+      [emit] compaction.completed        { "result": { "summary": "First compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 114, "keptUserMessageCount": 1 } }
       [wire] context.append_loop_event   { "event": { "type": "step.begin", "uuid": "<uuid-1>", "turnId": "0", "step": 1 }, "time": "<time>" }
       [emit] turn.step.started           { "turnId": 0, "step": 1, "stepId": "<uuid-1>" }
       [emit] assistant.delta             { "turnId": 0, "delta": "I need a tool." }