diff --git a/.changeset/compaction-internals-cleanup.md b/.changeset/compaction-internals-cleanup.md
new file mode 100644
index 000000000..6d5bc7280
--- /dev/null
+++ b/.changeset/compaction-internals-cleanup.md
@@ -0,0 +1,5 @@
+---
+"@moonshot-ai/kimi-code": patch
+---
+
+Tighten compaction bookkeeping so compacted history stays consistent across retries and resume.
diff --git a/.changeset/rework-compaction-strategy.md b/.changeset/rework-compaction-strategy.md
new file mode 100644
index 000000000..6b42303d2
--- /dev/null
+++ b/.changeset/rework-compaction-strategy.md
@@ -0,0 +1,5 @@
+---
+"@moonshot-ai/kimi-code": minor
+---
+
+Rework conversation compaction to keep only real user prompts followed by a user-role summary, dropping assistant and tool messages.
diff --git a/apps/vis/server/src/lib/context-projector.ts b/apps/vis/server/src/lib/context-projector.ts
index fd7a376e6..40cd5810b 100644
--- a/apps/vis/server/src/lib/context-projector.ts
+++ b/apps/vis/server/src/lib/context-projector.ts
@@ -1,3 +1,9 @@
+import {
+  COMPACT_USER_MESSAGE_MAX_TOKENS,
+  collectCompactableUserMessages,
+  isRealUserInput,
+  selectRecentUserMessages,
+} from '@moonshot-ai/agent-core';
 import type {
   ContentPart,
   ContextMessage,
@@ -234,19 +240,21 @@ export function projectContext(
         break;
       case 'context.apply_compaction': {
         openSteps = new Map();
-        // Mirror agent-core's actual `applyCompaction` behaviour
-        // (`packages/agent-core/src/agent/context/index.ts`): history becomes
-        // `[summaryBubble, ...history.slice(compactedCount)]`. The summary is
-        // an *assistant* message tagged `origin.kind = 'compaction_summary'`
-        // (using 'system' would skew role counts and any downstream diff
-        // against agent-core history). The post-compaction tail is preserved
-        // rather than dropped, so messages still in context stay visible.
+        // Mirror agent-core's `applyCompaction`
+        // (`packages/agent-core/src/agent/context/index.ts`): the live history
+        // becomes the most recent real user messages (verbatim, within a token
+        // budget) followed by a single user-role summary tagged
+        // `origin.kind = 'compaction_summary'`. Assistant messages, tool calls,
+        // and tool results are dropped. The selection rule
+        // (`selectRecentUserMessages` / `collectCompactableUserMessages`) is the
+        // same helper agent-core's `ContextMemory` and the web transcript
+        // reducer apply, so all three views stay in sync.
         const summaryBubble: ProjectedMessage = {
           lineNo: entry.lineNo,
           time: rec.time,
           source: 'compaction_summary',
           message: {
-            role: 'assistant',
+            role: 'user',
             content: [{ type: 'text', text: rec.summary }],
             toolCalls: [],
             origin: { kind: 'compaction_summary' },
@@ -258,34 +266,49 @@ export function projectContext(
             tokensAfter: rec.tokensAfter,
           },
         };
+        const modelSummaryBubble: ProjectedMessage =
+          rec.contextSummary === undefined
+            ? summaryBubble
+            : {
+                ...summaryBubble,
+                message: {
+                  ...summaryBubble.message,
+                  content: [{ type: 'text', text: rec.contextSummary }],
+                } as ContextMessage,
+              };
         if (mode === 'model') {
-          // Drop the first `rec.compactedCount` HISTORY entries (NOT array
-          // entries): agent-core's `compactedCount` indexes into `_history`,
-          // which never contains our synthetic 'undo'/'clear' markers. Walk the
-          // array counting only history entries (`isHistoryEntry`) until
-          // `compactedCount` are passed, then slice there — any UI-only markers
-          // in the dropped region go with it (correct: they precede the
-          // compaction). With no markers this is exactly `slice(compactedCount)`.
-          let sliceAt = messages.length;
-          let passed = 0;
-          for (let i = 0; i < messages.length; i++) {
-            if (passed >= rec.compactedCount) {
-              sliceAt = i;
-              break;
-            }
-            if (isHistoryEntry(messages[i]!)) passed++;
-          }
-          if (passed < rec.compactedCount) sliceAt = messages.length;
-          messages = [summaryBubble, ...messages.slice(sliceAt)];
+          // Rebuild the model's-eye view as the kept user messages + summary.
+          // `realUserEntries` is filtered with the exact
+          // `collectCompactableUserMessages` predicate so it stays aligned with
+          // the selection below (genuine user input only — no injections,
+          // system triggers, or prior summaries). `selectRecentUserMessages`
+          // keeps a contiguous suffix of that subsequence, with only the oldest
+          // kept message possibly truncated, so each kept message maps back onto
+          // its original ProjectedMessage wrapper (preserving line/time); we swap
+          // in the (possibly truncated) message object.
+          const historyEntries = messages.filter(isHistoryEntry);
+          const realUserEntries = historyEntries.filter(
+            (pm) => collectCompactableUserMessages([pm.message]).length === 1,
+          );
+          const keptUserMessages = selectRecentUserMessages(
+            realUserEntries.map((pm) => pm.message),
+            COMPACT_USER_MESSAGE_MAX_TOKENS,
+          );
+          const suffixStart = realUserEntries.length - keptUserMessages.length;
+          const keptEntries: ProjectedMessage[] = keptUserMessages.map((message, i) => {
+            const original = realUserEntries[suffixStart + i]!;
+            return original.message === message ? original : { ...original, message };
+          });
+          messages = [...keptEntries, modelSummaryBubble];
         } else {
           // Full history: keep ALL preceding messages, just append the summary
           // marker inline so the compacted prefix stays visible.
           messages.push(summaryBubble);
         }
         // Mirror agent-core applyCompaction() → microCompaction.reset() (cutoff
-        // → 0): the message list is rebuilt as [summary, ...tail], so the old
-        // index-based cutoff no longer points at the same messages. (In full
-        // mode the blanking pass does not run, so this is a no-op there.)
+        // → 0): the message list is rebuilt, so the old index-based cutoff no
+        // longer points at the same messages. (In full mode the blanking pass
+        // does not run, so this is a no-op there.)
         microCutoff = 0;
         // Mirror agent-core applyCompaction() → _tokenCount = result.tokensAfter:
         // the live context-window fill is now the post-compaction count. Derived
@@ -577,16 +600,6 @@ function isHistoryEntry(pm: ProjectedMessage): boolean {
   return pm.source !== 'undo' && pm.source !== 'clear';
 }
 
-/** Mirrors agent-core `isRealUserPrompt` (`agent/context/index.ts`): a message
- *  counts toward an undo only if it is a genuine user prompt. */
-function isRealUserPrompt(message: ContextMessage): boolean {
-  if (message.role !== 'user') return false;
-  const origin = message.origin;
-  if (origin === undefined || origin.kind === 'user') return true;
-  if (origin.kind === 'skill_activation') return origin.trigger === 'user-slash';
-  return false;
-}
-
 /** Single source of truth for the `context.undo` backward walk, shared by both
  *  projection modes. Mirrors agent-core `undo` (`agent/context/index.ts`): walk
  *  from the end, skip `origin.kind === 'injection'` (those are KEPT even when
@@ -612,7 +625,7 @@ function computeUndoCutoff(
     if (origin?.kind === 'compaction_summary') break; // stop
     removedMessageCount++;
     cutoff = i;
-    if (isRealUserPrompt(messages[i]!.message) && ++removedUserCount >= count) break;
+    if (isRealUserInput(messages[i]!.message) && ++removedUserCount >= count) break;
   }
   return { cutoff, removedMessageCount };
 }
diff --git a/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl b/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl
index 317df60b2..9f44d9a7d 100644
--- a/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl
+++ b/apps/vis/server/test/fixtures/sessions/sample-compaction/agents/main/wire.jsonl
@@ -1,5 +1,6 @@
 {"type":"metadata","protocol_version":"1.1","created_at":1779256791085}
 {"type":"config.update","cwd":"/tmp/work","profileName":"agent","systemPrompt":"You are Kimi.","time":1779256791100}
 {"type":"context.append_message","message":{"role":"user","content":[{"type":"text","text":"before compaction"}],"toolCalls":[]},"time":1779256800001}
-{"type":"context.apply_compaction","summary":"compacted summary","compactedCount":1,"tokensBefore":100,"tokensAfter":30,"time":1779256800500}
+{"type":"context.append_message","message":{"role":"assistant","content":[{"type":"text","text":"assistant reply"}],"toolCalls":[]},"time":1779256800200}
+{"type":"context.apply_compaction","summary":"compacted summary","compactedCount":2,"tokensBefore":100,"tokensAfter":30,"time":1779256800500}
 {"type":"context.append_message","message":{"role":"user","content":[{"type":"text","text":"after compaction"}],"toolCalls":[]},"time":1779256801000}
diff --git a/apps/vis/server/test/lib/context-projector.test.ts b/apps/vis/server/test/lib/context-projector.test.ts
index d2a2d3f4c..fa21b0789 100644
--- a/apps/vis/server/test/lib/context-projector.test.ts
+++ b/apps/vis/server/test/lib/context-projector.test.ts
@@ -262,33 +262,100 @@ describe('context-projector', () => {
       { lineNo: 4, data: { type: 'context.append_message' as const, message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'new' }], toolCalls: [] } }, raw: {} },
     ];
     const proj = projectContext(entries as any);
-    expect(proj.messages[0]!.source).toBe('compaction_summary');
-    // Compaction summary is an assistant message (agent-core's own
+    // Model view: the kept user prompt + user-role summary + the new prompt.
+    expect(proj.messages.map((m) => m.source)).toEqual([
+      'append_message', 'compaction_summary', 'append_message',
+    ]);
+    expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'old' });
+    // The compaction summary is a user message (agent-core's own
     // representation), not a synthetic system message.
-    expect(proj.messages[0]!.message.role).toBe('assistant');
-    expect(proj.messages[0]!.message.origin).toEqual({ kind: 'compaction_summary' });
-    expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'old stuff' });
-    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'new' });
+    expect(proj.messages[1]!.message.role).toBe('user');
+    expect(proj.messages[1]!.message.origin).toEqual({ kind: 'compaction_summary' });
+    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'old stuff' });
+    expect(proj.messages[2]!.message.content[0]).toMatchObject({ text: 'new' });
+  });
+
+  it('uses contextSummary only for the model view and raw summary for full history', () => {
+    const entries = [
+      { lineNo: 1, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'old' }], toolCalls: [] } }, raw: {} },
+      { lineNo: 2, data: { type: 'context.apply_compaction' as const,
+          summary: 'raw summary', contextSummary: 'prefixed summary', compactedCount: 1, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
+    ];
+
+    const model = projectContext(entries as any);
+    expect(model.messages.map((m) => m.message.content[0])).toMatchObject([
+      { text: 'old' },
+      { text: 'prefixed summary' },
+    ]);
+
+    const full = projectContext(entries as any, 'full');
+    expect(full.messages.map((m) => m.message.content[0])).toMatchObject([
+      { text: 'old' },
+      { text: 'raw summary' },
+    ]);
   });
 
-  it('apply_compaction keeps the post-compaction tail (slice(compactedCount))', () => {
+  it('apply_compaction keeps the most recent user messages and drops the assistant/tool tail', () => {
     const entries = [
       { lineNo: 1, data: { type: 'context.append_message' as const,
           message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'm0' }], toolCalls: [] } }, raw: {} },
       { lineNo: 2, data: { type: 'context.append_message' as const,
           message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'm1' }], toolCalls: [] } }, raw: {} },
       { lineNo: 3, data: { type: 'context.append_message' as const,
-          message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'm2 (kept)' }], toolCalls: [] } }, raw: {} },
+          message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'm2 (dropped)' }], toolCalls: [] } }, raw: {} },
       { lineNo: 4, data: { type: 'context.apply_compaction' as const,
-          summary: 'sum', compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
+          summary: 'sum', compactedCount: 3, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
     ];
     const proj = projectContext(entries as any);
-    // [summary, m2] — m0 and m1 (the first compactedCount=2) are dropped, m2 kept.
-    expect(proj.messages).toHaveLength(2);
-    expect(proj.messages[0]!.source).toBe('compaction_summary');
-    expect(proj.messages[0]!.compaction).toEqual({ compactedCount: 2, tokensBefore: 100, tokensAfter: 10 });
-    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'm2 (kept)' });
-    expect(proj.messages[1]!.lineNo).toBe(3);
+    // [m0, m1, summary] — real user prompts are kept verbatim, the assistant
+    // tail is dropped.
+    expect(proj.messages).toHaveLength(3);
+    expect(proj.messages.map((m) => m.source)).toEqual([
+      'append_message', 'append_message', 'compaction_summary',
+    ]);
+    expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'm0' });
+    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'm1' });
+    expect(proj.messages[2]!.compaction).toEqual({ compactedCount: 3, tokensBefore: 100, tokensAfter: 10 });
+    expect(proj.messages[2]!.message.content[0]).toMatchObject({ text: 'sum' });
+  });
+
+  it('apply_compaction drops shell/local-command/background messages in model mode only', () => {
+    const entries = [
+      { lineNo: 1, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'real user' }], toolCalls: [], origin: { kind: 'user' as const } } }, raw: {} },
+      { lineNo: 2, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: '! pwd' }], toolCalls: [], origin: { kind: 'shell_command' as const, phase: 'input' as const } } }, raw: {} },
+      { lineNo: 3, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'local output' }], toolCalls: [], origin: { kind: 'injection' as const, variant: 'local-command-stdout' } } }, raw: {} },
+      { lineNo: 4, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'background done' }], toolCalls: [], origin: { kind: 'background_task' as const, taskId: 'task', status: 'completed' as const, notificationId: 'notification' } } }, raw: {} },
+      { lineNo: 5, data: { type: 'context.append_message' as const,
+          message: { role: 'assistant' as const, content: [{ type: 'text' as const, text: 'assistant reply' }], toolCalls: [] } }, raw: {} },
+      { lineNo: 6, data: { type: 'context.apply_compaction' as const,
+          summary: 'sum', compactedCount: 5, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
+      { lineNo: 7, data: { type: 'context.append_message' as const,
+          message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'new' }], toolCalls: [], origin: { kind: 'user' as const } } }, raw: {} },
+    ];
+
+    const model = projectContext(entries as any);
+    expect(model.messages.map((m) => m.source)).toEqual([
+      'append_message', 'compaction_summary', 'append_message',
+    ]);
+    expect(model.messages.map((m) => m.message.content[0])).toMatchObject([
+      { text: 'real user' }, { text: 'sum' }, { text: 'new' },
+    ]);
+
+    const full = projectContext(entries as any, 'full');
+    expect(full.messages.map((m) => m.source)).toEqual([
+      'append_message', 'append_message', 'append_message', 'append_message',
+      'append_message', 'compaction_summary', 'append_message',
+    ]);
+    expect(full.messages.map((m) => m.message.content[0])).toMatchObject([
+      { text: 'real user' }, { text: '! pwd' }, { text: 'local output' },
+      { text: 'background done' }, { text: 'assistant reply' }, { text: 'sum' },
+      { text: 'new' },
+    ]);
   });
 
   // ---- Fix ④: UI-only markers must not offset agent-core history indices ------
@@ -298,7 +365,7 @@ describe('context-projector', () => {
   // real history entries (append_message + compaction_summary), skipping
   // 'undo'/'clear' markers.
 
-  it('apply_compaction slices by history index, skipping a preceding undo marker (model)', () => {
+  it('apply_compaction keeps user messages across a preceding undo marker (model)', () => {
     const userMsg = (text: string) => ({
       role: 'user' as const, content: [{ type: 'text' as const, text }], toolCalls: [],
       origin: { kind: 'user' as const },
@@ -306,14 +373,10 @@ describe('context-projector', () => {
     // Step 1: append u1, u2 then undo(1) → removes u2, leaves [u1, <undo marker>].
     // Step 2: append u3, u4 → array is [u1, <undo marker>, u3, u4].
     // History entries (agent-core _history, which has NO marker) are the three
-    // real messages [u1, u3, u4]. A compaction with compactedCount=2 drops the
-    // first 2 HISTORY entries (u1, u3) — and the undo marker that sits within
-    // that compacted prefix is dropped with it — keeping exactly [summary, u4].
-    //
-    // The naive `messages.slice(compactedCount=2)` would instead cut the ARRAY at
-    // index 2, yielding [summary, u3, u4] — it WRONGLY retains the already-
-    // compacted u3 because the undo marker offset the index by one. This test
-    // pins the correct history-aware behaviour and FAILS against the naive slice.
+    // real user prompts [u1, u3, u4]. Compaction keeps all of them (they fit the
+    // budget) and appends the summary, dropping only the synthetic undo marker.
+    // This pins that the marker does not offset the kept-user selection — a naive
+    // array-slice would have retained the wrong prompts.
     const entries = [
       { lineNo: 1, data: { type: 'context.append_message' as const, message: userMsg('u1') }, raw: {} },
       { lineNo: 2, data: { type: 'context.append_message' as const, message: userMsg('u2') }, raw: {} },
@@ -321,12 +384,16 @@ describe('context-projector', () => {
       { lineNo: 4, data: { type: 'context.append_message' as const, message: userMsg('u3') }, raw: {} },
       { lineNo: 5, data: { type: 'context.append_message' as const, message: userMsg('u4') }, raw: {} },
       { lineNo: 6, data: { type: 'context.apply_compaction' as const,
-          summary: 'sum', compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
+          summary: 'sum', compactedCount: 3, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
     ];
     const proj = projectContext(entries as any);
-    // Correct: [summary, u4]. The marker and the first 2 history entries are gone.
-    expect(proj.messages.map((m) => m.source)).toEqual(['compaction_summary', 'append_message']);
-    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'u4' });
+    // Correct: [u1, u3, u4, summary]. The marker is gone, all real prompts kept.
+    expect(proj.messages.map((m) => m.source)).toEqual([
+      'append_message', 'append_message', 'append_message', 'compaction_summary',
+    ]);
+    expect(proj.messages.map((m) => m.message.content[0])).toMatchObject([
+      { text: 'u1' }, { text: 'u3' }, { text: 'u4' }, { text: 'sum' },
+    ]);
   });
 
   it('micro-blanking uses the history index, skipping a preceding undo marker (model)', () => {
@@ -675,7 +742,7 @@ describe('context-projector', () => {
   // marker but do NOT mutate/drop the surrounding message list. 'model' mode
   // (the default) keeps the existing model's-eye behaviour byte-identical.
 
-  it("defaults to 'model' mode when no 2nd arg is passed (compaction drops the prefix)", () => {
+  it("defaults to 'model' mode when no 2nd arg is passed (keeps recent user messages + summary)", () => {
     const entries = [
       { lineNo: 1, data: { type: 'context.append_message' as const,
           message: { role: 'user' as const, content: [{ type: 'text' as const, text: 'm0' }], toolCalls: [] } }, raw: {} },
@@ -684,10 +751,14 @@ describe('context-projector', () => {
       { lineNo: 3, data: { type: 'context.apply_compaction' as const,
           summary: 'sum', compactedCount: 2, tokensBefore: 100, tokensAfter: 10 }, raw: {} },
     ];
-    // No 2nd arg → 'model' default: prefix dropped, only the summary remains.
+    // No 2nd arg → 'model' default: the real user prompts are kept verbatim and
+    // the summary is appended after them.
     const proj = projectContext(entries as any);
-    expect(proj.messages).toHaveLength(1);
-    expect(proj.messages[0]!.source).toBe('compaction_summary');
+    expect(proj.messages.map((m) => m.source)).toEqual([
+      'append_message', 'append_message', 'compaction_summary',
+    ]);
+    expect(proj.messages[0]!.message.content[0]).toMatchObject({ text: 'm0' });
+    expect(proj.messages[1]!.message.content[0]).toMatchObject({ text: 'm1' });
   });
 
   it("full mode keeps the pre-compaction messages plus the summary marker plus the tail", () => {
diff --git a/apps/vis/server/test/routes/context.test.ts b/apps/vis/server/test/routes/context.test.ts
index 486e6175d..6352747e9 100644
--- a/apps/vis/server/test/routes/context.test.ts
+++ b/apps/vis/server/test/routes/context.test.ts
@@ -69,28 +69,31 @@ describe('context route', () => {
     cleanup = c;
     const app = contextRoute(home);
 
-    // Default (model view): the pre-compaction message is dropped, leaving
-    // [summary, after-compaction].
+    // Default (model view): the real user prompt before compaction is KEPT, the
+    // assistant reply is dropped, then the summary, then the post-compaction tail.
     const modelRes = await app.request('/session_fixture/context?agent=main');
     expect(modelRes.status).toBe(200);
     const modelBody = (await modelRes.json()) as {
       messages: { source: string; message: { content: { type: string; text?: string }[] } }[];
     };
     expect(modelBody.messages.map((m) => m.source)).toEqual([
-      'compaction_summary', 'append_message',
+      'append_message', 'compaction_summary', 'append_message',
     ]);
+    expect(modelBody.messages[0]!.message.content[0]).toMatchObject({ text: 'before compaction' });
+    expect(modelBody.messages[2]!.message.content[0]).toMatchObject({ text: 'after compaction' });
 
-    // Full history: the pre-compaction message is KEPT, then the summary marker,
-    // then the post-compaction tail.
+    // Full history: every pre-compaction message (user prompt + assistant reply)
+    // is KEPT, then the summary marker, then the post-compaction tail.
     const fullRes = await app.request('/session_fixture/context?agent=main&history=full');
     expect(fullRes.status).toBe(200);
     const fullBody = (await fullRes.json()) as {
       messages: { source: string; message: { content: { type: string; text?: string }[] } }[];
     };
     expect(fullBody.messages.map((m) => m.source)).toEqual([
-      'append_message', 'compaction_summary', 'append_message',
+      'append_message', 'append_message', 'compaction_summary', 'append_message',
     ]);
     expect(fullBody.messages[0]!.message.content[0]).toMatchObject({ text: 'before compaction' });
-    expect(fullBody.messages[2]!.message.content[0]).toMatchObject({ text: 'after compaction' });
+    expect(fullBody.messages[1]!.message.content[0]).toMatchObject({ text: 'assistant reply' });
+    expect(fullBody.messages[3]!.message.content[0]).toMatchObject({ text: 'after compaction' });
   });
 });
diff --git a/packages/agent-core/src/agent/compaction/compaction-instruction.md b/packages/agent-core/src/agent/compaction/compaction-instruction.md
index 49b0d80b4..9ffd4b010 100644
--- a/packages/agent-core/src/agent/compaction/compaction-instruction.md
+++ b/packages/agent-core/src/agent/compaction/compaction-instruction.md
@@ -1,24 +1,21 @@
+You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.
 
 --- This message is a direct task, not part of the above conversation ---
 
-You are now given a task to compact this conversation context according to specific priorities and output requirements.
+You are now given a task to compact this conversation context according to the priorities and output requirements below.
 
-Output text only. DO NOT CALL ANY TOOLS. Calling tools will be rejected and fails the task. You already have all the information you need in the conversation history. You have only one chance.
+The goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.
 
-The goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared work.
+Compression priorities, in order:
 
-{{ customInstruction }}
-
-<!-- Compression Priorities (in order) -->
-
-1. **Current Task State**: What is being worked on RIGHT NOW
-2. **Errors & Solutions**: All encountered errors and their resolutions
-3. **Code Evolution**: Final working versions only (remove intermediate attempts)
-4. **System Context**: Project structure, dependencies, environment setup
-5. **Design Decisions**: Architectural choices and their rationale
-6. **TODO Items**: Unfinished tasks and known issues
+1. Current Task State: what is being worked on right now
+2. Errors & Solutions: unresolved or recurring errors and their resolutions
+3. Code Evolution: final working versions only; remove intermediate attempts
+4. System Context: project structure, dependencies, environment setup
+5. Design Decisions: architectural choices and their rationale
+6. TODO Items: unfinished tasks and known issues
 
-<!-- Required Output Structure -->
+Required output structure:
 
 ## Current Focus
 
@@ -54,16 +51,18 @@ The goal of compaction is to keep essential code patterns, technical details, an
 - [Useful classes/methods/functions]: [Brief description/usage]
 - ...
 
-<!-- Omit non-critical code, intermediate attempts, and resolved errors -->
+Omit non-critical code, intermediate attempts, and resolved errors.
 
 ## Important Context
 
 - [Any crucial information not covered above]
 - ...
 
-## All User Messages
+Be concise, structured, and focused on helping the next LLM seamlessly continue the work.
 
-- [Detailed non tool use user message]
-- ...
+Respond with text only. Do not call any tools — you already have everything you need in the conversation history.
 
-<!-- Must output a summary matching the above template in the **final answer**, not in thinking. -->
+{% if customInstruction %}
+Optional user instruction:
+{{ customInstruction }}
+{% endif %}
diff --git a/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md b/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md
new file mode 100644
index 000000000..62a7161b8
--- /dev/null
+++ b/packages/agent-core/src/agent/compaction/compaction-summary-prefix.md
@@ -0,0 +1 @@
+Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
\ No newline at end of file
diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts
index ebd8bfe18..84a9c3502 100644
--- a/packages/agent-core/src/agent/compaction/full.ts
+++ b/packages/agent-core/src/agent/compaction/full.ts
@@ -22,9 +22,14 @@ import {
   retryBackoffDelays,
   sleepForRetry,
 } from '../../loop/retry';
-import { renderPrompt } from '../../utils/render-prompt';
+import {
+  renderTodoList,
+  TODO_STORE_KEY,
+  type TodoItem,
+} from '../../tools/builtin/state/todo-list';
 import {
   estimateTokens,
+  estimateTokensForMessage,
   estimateTokensForMessages,
   estimateTokensForTools,
 } from '../../utils/tokens';
@@ -32,14 +37,15 @@ import {
   applyCompletionBudget,
   resolveCompletionBudget,
 } from '../../utils/completion-budget';
+import { renderPrompt } from '../../utils/render-prompt';
 import compactionInstructionTemplate from './compaction-instruction.md?raw';
-import { renderTodoList, type TodoItem } from '../../tools/builtin/state/todo-list';
 import type { CompactionBeginData, CompactionResult } from './types';
 import {
   DEFAULT_COMPACTION_CONFIG,
   DefaultCompactionStrategy,
   type CompactionStrategy,
 } from './strategy';
+import { buildCompactionSummaryText } from './memento';
 
 export const MAX_COMPACTION_RETRY_ATTEMPTS = 5;
 
@@ -62,6 +68,18 @@ export class FullCompaction {
     blockedByTurn: boolean;
   } | null = null;
   private readonly observedMaxContextTokensByModel = new Map<string, number>();
+  // Token count right after the last successful compaction. While no new
+  // content has been appended (tokenCountWithPending <= this value), the
+  // history is already in its minimal compacted form ([kept user prompts,
+  // summary]); re-compacting would only nest summaries, so
+  // checkAutoCompaction skips in that case even if an observed overflow
+  // limit still flags the context as oversized.
+  private lastCompactedTokenCount: number | null = null;
+  // Counts provider-overflow recoveries in this turn that have not yet been
+  // followed by a successful step. Trips MAX_OVERFLOW_COMPACTION_ATTEMPTS to
+  // stop an overflow -> compact -> overflow loop when compaction can no
+  // longer shrink the request below the model window.
+  private consecutiveOverflowCompactions = 0;
   protected readonly strategy: CompactionStrategy;
 
   constructor(
@@ -77,7 +95,7 @@ export class FullCompaction {
           reservedContextSize:
             agent.kimiConfig?.loopControl?.reservedContextSize ??
             DEFAULT_COMPACTION_CONFIG.reservedContextSize,
-        }
+        },
       );
   }
 
@@ -139,9 +157,8 @@ export class FullCompaction {
       });
       return;
     }
-    const compactedCount = this.strategy.computeCompactCount(this.agent.context.history, data.source);
-    if (compactedCount === 0) {
-      throw new KimiError(ErrorCodes.COMPACTION_UNABLE, 'No prefix that can be compacted in current history.');
+    if (this.agent.context.history.length === 0) {
+      throw new KimiError(ErrorCodes.COMPACTION_UNABLE, 'No messages to compact in current history.');
     }
     this.agent.records.logRecord({
       type: 'full_compaction.begin',
@@ -155,7 +172,7 @@ export class FullCompaction {
     const abortController = new AbortController();
     this.compacting = {
       abortController,
-      promise: this.compactionWorker(abortController.signal, data, compactedCount),
+      promise: this.compactionWorker(abortController.signal, data),
       blockedByTurn: false,
     };
   }
@@ -194,9 +211,20 @@ export class FullCompaction {
 
   resetForTurn(): void {
     this.compactionCountInTurn = 0;
+    this.lastCompactedTokenCount = null;
+    this.consecutiveOverflowCompactions = 0;
   }
 
   async handleOverflowError(signal: AbortSignal, error: unknown) {
+    this.consecutiveOverflowCompactions += 1;
+    const maxAttempts = this.strategy.maxOverflowCompactionAttempts;
+    if (this.consecutiveOverflowCompactions > maxAttempts) {
+      throw new KimiError(
+        ErrorCodes.CONTEXT_OVERFLOW,
+        `Compaction failed to bring the context under the model window after ${String(maxAttempts)} attempts.`,
+        { cause: error instanceof Error ? error : undefined },
+      );
+    }
     const didStartCompaction = this.beginAutoCompaction();
     if (!didStartCompaction && !this.compacting) throw error;
     // Always block on overflow errors
@@ -211,6 +239,10 @@ export class FullCompaction {
   }
 
   async afterStep(): Promise<void> {
+    // A completed step means a generate() succeeded, so any prior
+    // overflow -> compact cycle produced a request that now fits; clear the
+    // loop guard.
+    this.consecutiveOverflowCompactions = 0;
     if (this.strategy.checkAfterStep) {
       this.checkAutoCompaction(false);
     }
@@ -219,6 +251,12 @@ export class FullCompaction {
 
   private checkAutoCompaction(throwOnLimit: boolean = true): boolean {
     if (this.compacting) return true;
+    if (
+      this.lastCompactedTokenCount !== null &&
+      this.tokenCountWithPending <= this.lastCompactedTokenCount
+    ) {
+      return false;
+    }
     if (!this.strategy.shouldCompact(this.tokenCountWithPending)) return false;
     return this.beginAutoCompaction(throwOnLimit);
   }
@@ -258,34 +296,21 @@ export class FullCompaction {
   private async compactionWorker(
     signal: AbortSignal,
     data: Readonly<CompactionBeginData>,
-    compactedCount: number,
   ): Promise<void> {
     try {
-      const finalResult = {
-        summary: '',
-        compactedCount: 1,
-        tokensBefore: 0,
-        tokensAfter: 0,
-      };
-
-      for (let round = 1; ; round++) {
-        const result = await this.compactionRound(round, signal, data, compactedCount);
-        if (!result) return;
-
-        finalResult.summary = result.summary;
-        finalResult.compactedCount += result.compactedCount - 1;
-        finalResult.tokensBefore += result.tokensBefore - finalResult.tokensAfter;
-        finalResult.tokensAfter = result.tokensAfter;
-
-        if (result.tokensBefore - result.tokensAfter < 1024) break;
-        if (!this.strategy.shouldBlock(result.tokensAfter)) break;
-        compactedCount = this.strategy.computeCompactCount(this.agent.context.history, data.source);
-        if (compactedCount === 0) break;
-      }
+      const result = await this.compactionRound(signal, data);
+      if (!result) return;
       this.markCompleted();
-      this.agent.emitEvent({ type: 'compaction.completed', result: finalResult });
-      await this.agent.injection.injectGoal();
-      this.triggerPostCompactHook(data, finalResult);
+      try {
+        await this.agent.refreshSystemPrompt();
+      } catch (error) {
+        this.agent.log.error('failed to refresh system prompt after compaction', { error });
+      }
+      const { contextSummary: _contextSummary, ...eventResult } = result;
+      void _contextSummary;
+      this.agent.emitEvent({ type: 'compaction.completed', result: eventResult });
+      await this.agent.injection.injectAfterCompaction();
+      this.triggerPostCompactHook(data, result);
     } catch (error) {
       if (isAbortError(error)) return;
       const blockedByTurn = this.compacting?.blockedByTurn === true;
@@ -301,19 +326,31 @@ export class FullCompaction {
     }
   }
 
+  private buildInstruction(customInstruction: string | undefined): string {
+    return renderPrompt(compactionInstructionTemplate, {
+      customInstruction: customInstruction?.trim() ?? '',
+    }).trimEnd();
+  }
+
+  private postProcessSummary(summary: string): string {
+    const storeData = this.agent.tools.storeData();
+    const todos = (storeData[TODO_STORE_KEY] as readonly TodoItem[] | undefined) ?? [];
+    if (todos.length === 0) {
+      return summary;
+    }
+    const todoMarkdown = renderTodoList(todos, '## TODO List');
+    return `${summary.trim()}\n\n${todoMarkdown}`;
+  }
+
   private async compactionRound(
-    round: number,
     signal: AbortSignal,
     data: Readonly<CompactionBeginData>,
-    initialCompactedCount: number,
-  ) {
+  ): Promise<CompactionResult | undefined> {
     const startedAt = Date.now();
     const originalHistory = [...this.agent.context.history];
     const tokensBefore = estimateTokensForMessages(originalHistory);
     let retryCount = 0;
     try {
-      let compactedCount = initialCompactedCount;
-
       await this.triggerPreCompactHook(data, tokensBefore, signal);
 
       const model = this.agent.config.model;
@@ -337,15 +374,21 @@ export class FullCompaction {
         }),
         capability,
       });
+      const instruction = this.buildInstruction(data.instruction);
 
       const delays = retryBackoffDelays(MAX_COMPACTION_RETRY_ATTEMPTS);
-      let usage: TokenUsage | null;
-      let summary: string;
+      let usage: TokenUsage | null = null;
+      let summary: string | undefined;
+      // Compact the whole history, trimming old messages only when the
+      // summarizer request itself cannot fit. Any trimmed messages are not
+      // covered by the produced summary; `droppedCount` reports that blind spot.
+      let historyForModel = originalHistory;
+      let droppedCount = 0;
+      let overflowShrinkCount = 0;
       while (true) {
-        const messagesToCompact = originalHistory.slice(0, compactedCount);
         const messages = [
-          ...this.agent.context.project(messagesToCompact),
-          createUserMessage(renderPrompt(compactionInstructionTemplate, { customInstruction: data.instruction ?? '' })),
+          ...this.agent.context.project(historyForModel),
+          createUserMessage(instruction),
         ];
         const estimatedCompactionRequestTokens = this.estimateRequestTokens(messages);
         try {
@@ -371,14 +414,31 @@ export class FullCompaction {
           if (isContextOverflow) {
             this.observeContextOverflow(estimatedCompactionRequestTokens);
           }
-          if (
-            isContextOverflow ||
+          if (isContextOverflow && historyForModel.length > 1) {
+            overflowShrinkCount += 1;
+            if (overflowShrinkCount > MAX_COMPACTION_OVERFLOW_SHRINK_ATTEMPTS) {
+              throw error;
+            }
+            const before = historyForModel.length;
+            historyForModel = shrinkCompactionHistoryAfterOverflow(
+              historyForModel,
+              overflowShrinkCount,
+            );
+            droppedCount += before - historyForModel.length;
+            retryCount = 0;
+            continue;
+          }
+          const shouldShrinkAfterEmptyOrTruncated =
             error instanceof CompactionTruncatedError ||
-            error instanceof APIEmptyResponseError // e.g. think-only
-          ) {
-            compactedCount = this.strategy.reduceCompactOnOverflow(messagesToCompact);
+            error instanceof APIEmptyResponseError;
+          if (shouldShrinkAfterEmptyOrTruncated && historyForModel.length > 1) {
+            const before = historyForModel.length;
+            historyForModel = dropOldestMessageAndLeadingToolResults(historyForModel);
+            droppedCount += before - historyForModel.length;
+            retryCount = 0;
+            continue;
           }
-          else if (!isRetryableGenerateError(error)) {
+          if (!isRetryableGenerateError(error)) {
             throw error;
           }
           if (retryCount + 1 >= MAX_COMPACTION_RETRY_ATTEMPTS) {
@@ -402,17 +462,15 @@ export class FullCompaction {
         }
       }
 
-      summary = this.postProcessSummary(summary);
-
-      const recent = originalHistory.slice(compactedCount);
-      const tokensAfter = estimateTokens(summary) + estimateTokensForMessages(recent);
-
-      const result: CompactionResult = {
-        summary,
-        compactedCount,
+      const rawSummary = this.postProcessSummary(summary ?? '');
+      const contextSummary = buildCompactionSummaryText(rawSummary);
+      const result = this.agent.context.applyCompaction({
+        summary: rawSummary,
+        contextSummary,
+        compactedCount: originalHistory.length,
         tokensBefore,
-        tokensAfter,
-      };
+        droppedCount: droppedCount === 0 ? undefined : droppedCount,
+      });
 
       // Telemetry keys are snake_case, but the `context.apply_compaction`
       // record written below keeps its persisted camelCase field names
@@ -424,22 +482,23 @@ export class FullCompaction {
         tokens_after: result.tokensAfter,
         duration_ms: Date.now() - startedAt,
         compacted_count: result.compactedCount,
+        dropped_count: result.droppedCount,
         retry_count: retryCount,
-        round,
+        round: 1,
         thinking_level: this.agent.config.thinkingLevel,
         ...(usage === null
           ? {}
           : { input_tokens: inputTotal(usage), output_tokens: usage.output }),
       });
-      this.agent.context.applyCompaction(result);
+      this.lastCompactedTokenCount = result.tokensAfter;
       return result;
     } catch (error) {
-      if (isAbortError(error)) return;
+      if (isAbortError(error)) return undefined;
       this.agent.telemetry.track('compaction_failed', {
         source: data.source,
         tokens_before: tokensBefore,
         duration_ms: Date.now() - startedAt,
-        round,
+        round: 1,
         retry_count: retryCount,
         thinking_level: this.agent.config.thinkingLevel,
         error_type: error instanceof Error ? error.name : 'Unknown',
@@ -478,16 +537,52 @@ export class FullCompaction {
       },
     });
   }
+}
 
-  private postProcessSummary(summary: string): string {
-    const storeData = this.agent.tools.storeData();
-    const todos = (storeData['todo'] as readonly TodoItem[] | undefined) ?? [];
-    if (todos.length === 0) {
-      return summary;
-    }
-    const todoMarkdown = renderTodoList(todos, '## TODO List');
-    return `${summary.trim()}\n\n${todoMarkdown}`;
+const MAX_COMPACTION_OVERFLOW_SHRINK_ATTEMPTS = 3;
+const COMPACTION_OVERFLOW_SHRINK_RATIOS = [0.7, 0.5, 0.35] as const;
+
+function shrinkCompactionHistoryAfterOverflow<T extends Message>(
+  messages: readonly T[],
+  attempt: number,
+): T[] {
+  if (messages.length <= 1) return messages.slice();
+  const ratio = COMPACTION_OVERFLOW_SHRINK_RATIOS[
+    Math.min(attempt - 1, COMPACTION_OVERFLOW_SHRINK_RATIOS.length - 1)
+  ]!;
+  const tokenBudget = Math.floor(estimateTokensForMessages(messages) * ratio);
+  return takeRecentMessagesWithinTokenBudget(messages, tokenBudget);
+}
+
+function takeRecentMessagesWithinTokenBudget<T extends Message>(
+  messages: readonly T[],
+  tokenBudget: number,
+): T[] {
+  let start = messages.length;
+  let tokens = 0;
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const messageTokens = estimateTokensForMessage(messages[i]!);
+    if (tokens + messageTokens > tokenBudget) break;
+    tokens += messageTokens;
+    start = i;
+  }
+  if (start === 0) start = 1;
+  return dropLeadingToolResults(messages.slice(start));
+}
+
+function dropOldestMessageAndLeadingToolResults<T extends { readonly role: string }>(
+  messages: readonly T[],
+): T[] {
+  if (messages.length <= 1) return messages.slice();
+  return dropLeadingToolResults(messages.slice(1));
+}
+
+function dropLeadingToolResults<T extends { readonly role: string }>(messages: readonly T[]): T[] {
+  let start = 0;
+  while (start < messages.length && messages[start]!.role === 'tool') {
+    start += 1;
   }
+  return messages.slice(start);
 }
 
 function extractCompactionSummary(response: GenerateResult): string {
diff --git a/packages/agent-core/src/agent/compaction/index.ts b/packages/agent-core/src/agent/compaction/index.ts
index 4f92ac9fe..4e209f83b 100644
--- a/packages/agent-core/src/agent/compaction/index.ts
+++ b/packages/agent-core/src/agent/compaction/index.ts
@@ -2,3 +2,4 @@ export * from './full';
 export * from './micro';
 export * from './strategy';
 export * from './types';
+export * from './memento';
diff --git a/packages/agent-core/src/agent/compaction/memento.ts b/packages/agent-core/src/agent/compaction/memento.ts
new file mode 100644
index 000000000..061f50f05
--- /dev/null
+++ b/packages/agent-core/src/agent/compaction/memento.ts
@@ -0,0 +1,162 @@
+import type { ContentPart } from '@moonshot-ai/kosong';
+import { estimateTokensForMessage } from '../../utils/tokens';
+import type { PromptOrigin } from '../context/types';
+import summaryPrefixTemplate from './compaction-summary-prefix.md?raw';
+
+/**
+ * "Memento" compaction helpers.
+ *
+ * Compaction rewrites the model context as: the most recent user messages
+ * (verbatim, within a token budget) followed by a single user-role summary
+ * that is prefixed with `COMPACTION_SUMMARY_PREFIX`. Assistant messages,
+ * tool calls, and tool results are dropped. These helpers apply the exact
+ * same rule for both the live context rewrite and the transcript reducer.
+ */
+
+export const COMPACTION_SUMMARY_PREFIX = summaryPrefixTemplate.trimEnd();
+export const COMPACT_USER_MESSAGE_MAX_TOKENS = 20_000;
+
+/**
+ * Structural subset of kosong's `Message` that the memento helpers inspect.
+ * Both `ContextMessage` (the live context) and the wire-transcript reducer's
+ * mutable message satisfy this shape, so one set of helpers serves both
+ * layers without introducing a shared nominal type. `origin` is what tells
+ * real user input apart from injections and compaction summaries.
+ */
+interface MessageLike {
+  readonly role: string;
+  readonly content: readonly ContentPart[];
+  readonly origin?: PromptOrigin | undefined;
+}
+
+export type CompactionUserDisposition = 'keep' | 'drop';
+
+/**
+ * Single source of truth for whether a user-role message survives compaction as
+ * genuine user input. Codex-style semantics: only real user prompts and
+ * user-slash skill activations are kept verbatim. Everything else user-role is
+ * either rebuilt by injectors after compaction or intentionally ephemeral, so
+ * it is dropped from the live context even when transcript/replay retains it
+ * for UI rendering. New `PromptOrigin` kinds must update this switch.
+ */
+export function compactionUserMessageDisposition(
+  origin: PromptOrigin | undefined,
+): CompactionUserDisposition {
+  if (origin === undefined) return 'keep';
+  switch (origin.kind) {
+    case 'user':
+      return 'keep';
+    case 'skill_activation':
+      return origin.trigger === 'user-slash' ? 'keep' : 'drop';
+    case 'injection':
+    case 'shell_command':
+    case 'compaction_summary':
+    case 'system_trigger':
+    case 'background_task':
+    case 'cron_job':
+    case 'cron_missed':
+    case 'hook_result':
+    case 'retry':
+      return 'drop';
+    default: {
+      const _exhaustive: never = origin;
+      void _exhaustive;
+      return 'drop';
+    }
+  }
+}
+
+function extractText(content: readonly ContentPart[]): string {
+  let text = '';
+  for (const part of content) {
+    if (part.type === 'text') {
+      text += part.text;
+    }
+  }
+  return text;
+}
+
+export function isCompactionSummaryMessage(message: MessageLike): boolean {
+  return message.origin?.kind === 'compaction_summary';
+}
+
+/**
+ * Keep only genuine user input (real user prompts and user-slash skill
+ * activations). See `compactionUserMessageDisposition` for the full keep/drop
+ * policy and the rationale for each origin.
+ */
+export function isRealUserInput(message: MessageLike): boolean {
+  return message.role === 'user' && compactionUserMessageDisposition(message.origin) === 'keep';
+}
+
+export function collectCompactableUserMessages<T extends MessageLike>(messages: readonly T[]): T[] {
+  return messages.filter(
+    (message) => isRealUserInput(message) && !isCompactionSummaryMessage(message),
+  );
+}
+
+function truncateTextToTokens(text: string, maxTokens: number): string {
+  if (maxTokens <= 0) return '';
+  // Single pass: walk the string once, mirroring estimateTokens' heuristic
+  // (ASCII ~4 chars/token, non-ASCII ~1 char/token) and stop at the first
+  // code point that would push the running total over the budget. This keeps
+  // CJK-heavy inputs from the O(n^2) cost of re-estimating shrinking prefixes.
+  let asciiCount = 0;
+  let nonAsciiCount = 0;
+  let end = 0;
+  for (const char of text) {
+    if (char.codePointAt(0)! <= 127) {
+      asciiCount++;
+    } else {
+      nonAsciiCount++;
+    }
+    if (Math.ceil(asciiCount / 4) + nonAsciiCount > maxTokens) break;
+    end += char.length;
+  }
+  return text.slice(0, end);
+}
+
+function truncateUserMessage<T extends MessageLike>(message: T, maxTokens: number): T {
+  const text = truncateTextToTokens(extractText(message.content), maxTokens);
+  // Spread the original message to preserve every field (notably `origin`),
+  // then replace the content with the truncated text and drop any tool calls.
+  // Real user input never carries tool calls, so clearing them is safe. The
+  // cast back to `T` is unavoidable here: TypeScript cannot prove that a
+  // spread-then-override shape still equals the generic `T`.
+  return {
+    ...message,
+    content: [{ type: 'text', text }],
+    toolCalls: [],
+  } as unknown as T;
+}
+
+/**
+ * Keep the most recent user messages whose cumulative estimated size fits
+ * `maxTokens`. The oldest kept message is truncated to the remaining budget
+ * when it would otherwise overflow; older messages are dropped.
+ */
+export function selectRecentUserMessages<T extends MessageLike>(
+  messages: readonly T[],
+  maxTokens: number = COMPACT_USER_MESSAGE_MAX_TOKENS,
+): T[] {
+  const selected: T[] = [];
+  let remaining = maxTokens;
+  for (let i = messages.length - 1; i >= 0 && remaining > 0; i--) {
+    const message = messages[i]!;
+    const tokens = estimateTokensForMessage(message);
+    if (tokens <= remaining) {
+      selected.push(message);
+      remaining -= tokens;
+    } else {
+      selected.push(truncateUserMessage(message, remaining));
+      break;
+    }
+  }
+  selected.reverse();
+  return selected;
+}
+
+export function buildCompactionSummaryText(summary: string): string {
+  const suffix = summary.trim();
+  return `${COMPACTION_SUMMARY_PREFIX}\n${suffix.length > 0 ? suffix : '(no summary available)'}`;
+}
diff --git a/packages/agent-core/src/agent/compaction/strategy.ts b/packages/agent-core/src/agent/compaction/strategy.ts
index edf9132e0..d409d6e8d 100644
--- a/packages/agent-core/src/agent/compaction/strategy.ts
+++ b/packages/agent-core/src/agent/compaction/strategy.ts
@@ -1,43 +1,48 @@
-import type { Message } from "@moonshot-ai/kosong";
-import { estimateTokensForMessage } from "../../utils/tokens";
-import type { CompactionSource } from "./types";
+import type { CompactionSource } from './types';
 
 export interface CompactionConfig {
+  /** Fraction of the model context window that triggers auto-compaction. */
   triggerRatio: number;
+  /** Fraction of the model context window that blocks the turn on compaction. */
   blockRatio: number;
+  /** Reserved output budget; compaction triggers early to leave this much room. */
   reservedContextSize: number;
+  /** Maximum number of auto-compactions allowed in a single turn. */
   maxCompactionPerTurn: number;
-  maxRecentMessages: number;
-  maxRecentUserMessages: number;
-  maxRecentSizeRatio: number;
-  minOverflowReductionRatio: number;
+  /**
+   * Consecutive provider-overflow recoveries (overflow -> compact -> overflow
+   * again) allowed in a single turn before giving up. Caps the loop when
+   * compaction can no longer shrink the request below the model window.
+   */
+  maxOverflowCompactionAttempts: number;
 }
 
+/**
+ * Auto-compact at 85% of the resolved context window. `blockRatio` matches
+ * `triggerRatio` so compaction runs synchronously with no background
+ * compaction.
+ */
 export const DEFAULT_COMPACTION_CONFIG: CompactionConfig = {
   triggerRatio: 0.85,
-  blockRatio: 0.85, // Same as triggerRatio to disable async compaction
+  blockRatio: 0.85,
   reservedContextSize: 50_000,
   maxCompactionPerTurn: Infinity,
-  maxRecentMessages: 4,
-  maxRecentUserMessages: Infinity,
-  maxRecentSizeRatio: 0.2,
-  minOverflowReductionRatio: 0.05,
+  maxOverflowCompactionAttempts: 3,
 };
 
 export interface CompactionStrategy {
   shouldCompact(usedSize: number): boolean;
   shouldBlock(usedSize: number): boolean;
-  computeCompactCount(messages: readonly Message[], source: CompactionSource): number;
-  reduceCompactOnOverflow(messages: readonly Message[]): number;
   readonly checkAfterStep: boolean;
   readonly maxCompactionPerTurn: number;
+  readonly maxOverflowCompactionAttempts: number;
 }
 
 export class DefaultCompactionStrategy implements CompactionStrategy {
   constructor(
     protected readonly maxSizeProvider: () => number,
-    protected readonly config: CompactionConfig = DEFAULT_COMPACTION_CONFIG
-  ) { }
+    protected readonly config: CompactionConfig = DEFAULT_COMPACTION_CONFIG,
+  ) {}
 
   protected get maxSize(): number {
     return this.maxSizeProvider();
@@ -64,111 +69,6 @@ export class DefaultCompactionStrategy implements CompactionStrategy {
     return reservedSize > 0 && reservedSize < this.maxSize && usedSize + reservedSize >= this.maxSize;
   }
 
-  computeCompactCount(messages: readonly Message[], source: CompactionSource): number {
-    // Return value: N messages to be compacted (0 means no compaction possible)
-    // LLM Input: messages.slice(0, N) + [user:instruction]
-    // Preserved recent messages: messages.slice(N)
-
-    // Manual compaction
-    if (source === 'manual') {
-      for (let i = messages.length - 1; i > 0; i--) {
-        if (canSplitAfter(messages, i)) {
-          return this.fitCompactCountToWindow(messages, i + 1);
-        }
-      }
-      return 0;
-    }
-
-    // Auto compaction rules (in order of precedence):
-    // 1. The split after messages[N-1] must be safe per `canSplitAfter`:
-    //    messages[N-1] is not a user or asst-with-tool-calls, and the retained
-    //    suffix messages.slice(N) has no orphan tool result.
-    // 2. At least one recent message must be preserved
-    // 3. At most maxRecentMessages recent messages should be preserved
-    // 4. At most maxRecentUserMessages recent user messages should be preserved
-    // 5. At most maxRecentSizeRatio * maxSize recent messages should be preserved
-    // 6. N should be as small as possible
-
-    let recentMessages = 1;
-    let recentUserMessages = 0;
-    let recentSize = 0;
-    let bestN: number | undefined;
-
-    for (; recentMessages < messages.length; recentMessages++) {
-      const splitIndex = messages.length - recentMessages - 1;
-      const m2 = messages[messages.length - recentMessages]!;
-
-      if (m2.role === 'user') {
-        recentUserMessages++;
-      }
-      recentSize += estimateTokensForMessage(m2);
-
-      if (canSplitAfter(messages, splitIndex)) {
-        bestN = splitIndex + 1;
-      }
-
-      const reachesMax = recentMessages >= this.config.maxRecentMessages
-        || recentUserMessages >= this.config.maxRecentUserMessages
-        || recentSize >= this.maxSize * this.config.maxRecentSizeRatio;
-      if (reachesMax && bestN !== undefined) {
-        break;
-      }
-    }
-
-    return this.fitCompactCountToWindow(messages, bestN ?? 0);
-  }
-
-  reduceCompactOnOverflow(messages: readonly Message[]): number {
-    const minReducedSize = Math.max(
-      1,
-      Math.ceil(this.maxSize * this.config.minOverflowReductionRatio),
-    );
-    let reducedSize = 0;
-    let bestN: number | undefined;
-
-    for (let i = messages.length - 2; i > 0; i--) {
-      reducedSize += estimateTokensForMessage(messages[i + 1]!);
-      if (canSplitAfter(messages, i)) {
-        bestN = i + 1;
-        if (reducedSize >= minReducedSize) {
-          return i + 1;
-        }
-      }
-    }
-    return bestN ?? messages.length;
-  }
-
-  private fitCompactCountToWindow(
-    messages: readonly Message[],
-    compactedCount: number,
-  ): number {
-    if (this.maxSize <= 0 || compactedCount <= 0) {
-      return compactedCount;
-    }
-
-    let compactedSize = 0;
-    for (let i = 0; i < compactedCount; i++) {
-      compactedSize += estimateTokensForMessage(messages[i]!);
-    }
-    if (compactedSize <= this.maxSize) {
-      return compactedCount;
-    }
-
-    let bestN: number | undefined;
-    for (let n = compactedCount - 1; n > 0; n--) {
-      compactedSize -= estimateTokensForMessage(messages[n]!);
-      if (!canSplitAfter(messages, n - 1)) {
-        continue;
-      }
-      bestN = n;
-      if (compactedSize <= this.maxSize) {
-        return n;
-      }
-    }
-
-    return bestN ?? compactedCount;
-  }
-
   get checkAfterStep(): boolean {
     return this.config.triggerRatio !== this.config.blockRatio;
   }
@@ -176,45 +76,10 @@ export class DefaultCompactionStrategy implements CompactionStrategy {
   get maxCompactionPerTurn(): number {
     return this.config.maxCompactionPerTurn;
   }
-}
 
-/**
- * Decide whether a compaction split is safe to place immediately after
- * `messages[index]`. A split is safe only when:
- *   - `messages[index]` itself is not a user message or an assistant message
- *     with pending tool calls (cutting either of those off from what follows
- *     would break the conversation), AND
- *   - the next message is not a tool result. The history is well-formed:
- *     tool results only appear after their owning `asst_w_tc` and all tool
- *     results for one exchange land consecutively before the next non-tool
- *     message. So if the suffix starts with a tool result, its `asst_w_tc`
- *     must be in the compacted prefix, which would orphan that result
- *     (e.g. splitting between tool_a and tool_b of a parallel call), AND
- *   - the compacted prefix itself does not end with an unresolved tool
- *     exchange, because pending tool results must remain in the retained tail.
- */
-function canSplitAfter(messages: readonly Message[], index: number): boolean {
-  const m = messages[index];
-  if (m === undefined) return false;
-  if (m.role === 'user') return false;
-  if (m.role === 'assistant' && m.toolCalls.length > 0) return false;
-  if (messages[index + 1]?.role === 'tool') return false;
-  if (prefixEndsWithOpenToolExchange(messages, index)) return false;
-  return true;
-}
-
-function prefixEndsWithOpenToolExchange(messages: readonly Message[], index: number): boolean {
-  if (messages[index]?.role !== 'tool') return false;
-
-  let toolResultCount = 0;
-  for (let i = index; i >= 0; i--) {
-    const message = messages[i];
-    if (message === undefined) return false;
-    if (message.role === 'tool') {
-      toolResultCount++;
-      continue;
-    }
-    return message.role === 'assistant' && message.toolCalls.length > toolResultCount;
+  get maxOverflowCompactionAttempts(): number {
+    return this.config.maxOverflowCompactionAttempts;
   }
-  return false;
 }
+
+export type { CompactionSource };
diff --git a/packages/agent-core/src/agent/compaction/types.ts b/packages/agent-core/src/agent/compaction/types.ts
index 820365cdc..cef3c5308 100644
--- a/packages/agent-core/src/agent/compaction/types.ts
+++ b/packages/agent-core/src/agent/compaction/types.ts
@@ -1,10 +1,46 @@
 export interface CompactionResult {
+  /** Human-facing summary text produced by the compaction model. */
   summary: string;
+  /**
+   * Exact summary message stored in the live model context. It includes the
+   * compaction prefix that tells the next model this is handoff context rather
+   * than a real user prompt. Optional for backward compatibility with older
+   * wire records, where `summary` was also the model-context text.
+   */
+  contextSummary?: string;
   compactedCount: number;
   tokensBefore: number;
   tokensAfter: number;
+  /**
+   * Number of real user messages kept verbatim ahead of the summary in the
+   * post-compaction live context. Written by `ContextMemory.applyCompaction`
+   * (the single derivation point for the post-compaction shape) so the
+   * wire-transcript reducer can reproduce the live folded length without
+   * re-deriving it from the full transcript. Optional for backward
+   * compatibility with older wire records.
+   */
+  keptUserMessageCount?: number;
+  /**
+   * Number of oldest messages trimmed from the summarizer input when the
+   * compaction request itself overflowed the model window. These messages are
+   * not covered by the produced summary — a real-user message among them may
+   * still be retained verbatim in the live context via `keptUserMessageCount`,
+   * but assistant/tool messages are lost. Surfacing the count lets records and
+   * telemetry report the summary's blind spot honestly. Optional for backward
+   * compatibility with older wire records.
+   */
+  droppedCount?: number;
 }
 
+/**
+ * Inputs `ContextMemory.applyCompaction` needs to derive a `CompactionResult`.
+ * `tokensAfter` / `keptUserMessageCount` / `droppedCount` are optional: the live
+ * path fills in what it knows, while restore passes the persisted record so its
+ * historical values are preserved verbatim.
+ */
+export type CompactionInput = Pick<CompactionResult, 'summary' | 'compactedCount' | 'tokensBefore'> &
+  Partial<Pick<CompactionResult, 'contextSummary' | 'tokensAfter' | 'keptUserMessageCount' | 'droppedCount'>>;
+
 export type CompactionSource = 'manual' | 'auto';
 
 export interface CompactionBeginData {
diff --git a/packages/agent-core/src/agent/context/index.ts b/packages/agent-core/src/agent/context/index.ts
index cf4c88395..eab7337e0 100644
--- a/packages/agent-core/src/agent/context/index.ts
+++ b/packages/agent-core/src/agent/context/index.ts
@@ -3,9 +3,16 @@ import { createToolMessage, type ContentPart, type Message } from '@moonshot-ai/
 import type { Agent } from '..';
 import { ErrorCodes, KimiError } from '../../errors';
 import type { ExecutableToolResult, LoopRecordedEvent } from '../../loop';
-import { estimateTokensForMessages } from '../../utils/tokens';
+import { estimateTokens, estimateTokensForMessages } from '../../utils/tokens';
 import { escapeXml } from '../../utils/xml-escape';
-import type { CompactionResult } from '../compaction';
+import {
+  COMPACT_USER_MESSAGE_MAX_TOKENS,
+  collectCompactableUserMessages,
+  isRealUserInput,
+  selectRecentUserMessages,
+  type CompactionInput,
+  type CompactionResult,
+} from '../compaction';
 import { project, trimTrailingOpenToolExchange } from './projector';
 import {
   USER_PROMPT_ORIGIN,
@@ -172,7 +179,7 @@ export class ContextMemory {
         this._tokenCount -= estimateTokensForMessages([message]);
       }
 
-      if (isRealUserPrompt(message)) {
+      if (isRealUserInput(message)) {
         removedUserCount++;
         if (removedUserCount >= count) break;
       }
@@ -205,7 +212,36 @@ export class ContextMemory {
     }
   }
 
-  applyCompaction(result: CompactionResult): void {
+  applyCompaction(input: CompactionInput): CompactionResult {
+    // Single derivation point for the post-compaction shape: the most recent
+    // real user messages (verbatim, within the token budget) followed by a
+    // user-role summary. `tokensAfter` and `keptUserMessageCount` are derived
+    // here from the actual `_history` so the live context, the wire record,
+    // and the transcript reducer all agree — re-deriving them elsewhere (e.g.
+    // from the full transcript, which still holds the untruncated originals of
+    // messages the live context truncated) would diverge.
+    const keptUserMessages = selectRecentUserMessages(
+      collectCompactableUserMessages(this._history),
+      COMPACT_USER_MESSAGE_MAX_TOKENS,
+    );
+    // Live compaction omits these so they are derived from the actual
+    // `_history`; restore passes the persisted record so its historical values
+    // are preserved verbatim. Older wire records did not have `contextSummary`,
+    // so their `summary` remains the model-context text during restore.
+    const contextSummary = input.contextSummary ?? input.summary;
+    const tokensAfter =
+      input.tokensAfter ??
+      estimateTokens(contextSummary) + estimateTokensForMessages(keptUserMessages);
+    const keptUserMessageCount = input.keptUserMessageCount ?? keptUserMessages.length;
+    const result: CompactionResult = {
+      summary: input.summary,
+      contextSummary,
+      compactedCount: input.compactedCount,
+      tokensBefore: input.tokensBefore,
+      tokensAfter,
+      keptUserMessageCount,
+      droppedCount: input.droppedCount,
+    };
     this.agent.records.logRecord({
       type: 'context.apply_compaction',
       ...result,
@@ -213,27 +249,34 @@ export class ContextMemory {
     this.agent.replayBuilder.patchLast('compaction', {
       result: {
         summary: result.summary,
+        contextSummary: result.contextSummary,
         compactedCount: result.compactedCount,
         tokensBefore: result.tokensBefore,
         tokensAfter: result.tokensAfter,
+        keptUserMessageCount: result.keptUserMessageCount,
+        droppedCount: result.droppedCount,
       },
     });
     this._history = [
+      ...keptUserMessages,
       {
-        role: 'assistant',
-        content: [{ type: 'text', text: result.summary }],
+        role: 'user',
+        content: [{ type: 'text', text: contextSummary }],
         toolCalls: [],
         origin: { kind: 'compaction_summary' },
       },
-      ...this._history.slice(result.compactedCount),
     ];
     this.openSteps.clear();
-    this.flushDeferredMessagesIfToolExchangeClosed();
+    this.pendingToolResultIds.clear();
+    // Drop deferred messages (mostly injections/system reminders) instead of
+    // flushing them: initial context is rebuilt every turn.
+    this.deferredMessages = [];
     this._tokenCount = result.tokensAfter;
     this.tokenCountCoveredMessageCount = this._history.length;
     this.agent.microCompaction.reset();
-    this.agent.injection.onContextCompacted(result.compactedCount);
+    this.agent.injection.onContextCompacted();
     this.agent.emitStatusUpdated();
+    return result;
   }
 
   data(): AgentContextData {
@@ -461,16 +504,6 @@ function isEmptyOutputText(output: string): boolean {
   return output.length === 0 || output.trim() === TOOL_OUTPUT_EMPTY_TEXT;
 }
 
-function isRealUserPrompt(message: ContextMessage): boolean {
-  if (message.role !== 'user') return false;
-  const origin = message.origin;
-  if (origin === undefined || origin.kind === 'user') return true;
-  if (origin.kind === 'skill_activation') {
-    return origin.trigger === 'user-slash';
-  }
-  return false;
-}
-
 function formatUndoUnavailableMessage(
   requestedCount: number,
   undoableCount: number,
diff --git a/packages/agent-core/src/agent/index.ts b/packages/agent-core/src/agent/index.ts
index 4e733a80c..e17b98c8b 100644
--- a/packages/agent-core/src/agent/index.ts
+++ b/packages/agent-core/src/agent/index.ts
@@ -11,7 +11,11 @@ import type { EnabledPluginSessionStart } from '#/plugin';
 
 import type { McpConnectionManager } from '../mcp';
 import { FlagResolver, type ExperimentalFlagResolver } from '../flags';
-import type { PreparedSystemPromptContext, ResolvedAgentProfile } from '../profile';
+import {
+  prepareSystemPromptContext,
+  type PreparedSystemPromptContext,
+  type ResolvedAgentProfile,
+} from '../profile';
 import type { ModelProvider } from '../session/provider-manager';
 import type { SessionSubagentHost } from '../session/subagent-host';
 import { noopTelemetryClient, type TelemetryClient } from '../telemetry';
@@ -82,6 +86,7 @@ export interface AgentOptions {
   readonly experimentalFlags?: ExperimentalFlagResolver;
   readonly replay?: ReplayBuilderOptions;
   readonly additionalDirs?: readonly string[];
+  readonly systemPromptContextProvider?: (() => Promise<PreparedSystemPromptContext>) | undefined;
 }
 
 export class Agent {
@@ -127,6 +132,9 @@ export class Agent {
   readonly replayBuilder: ReplayBuilder;
 
   private additionalDirs: readonly string[];
+  private activeProfile?: ResolvedAgentProfile;
+  private brandHome?: string;
+  private readonly systemPromptContextProvider?: (() => Promise<PreparedSystemPromptContext>) | undefined;
 
   constructor(options: AgentOptions) {
     this.type = options.type ?? 'main';
@@ -145,6 +153,7 @@ export class Agent {
     this.telemetry = options.telemetry ?? noopTelemetryClient;
     this.experimentalFlags = options.experimentalFlags ?? new FlagResolver();
     this.additionalDirs = normalizeAdditionalDirs(options.additionalDirs ?? []);
+    this.systemPromptContextProvider = options.systemPromptContextProvider;
 
     this.llmRequestLogger = new LlmRequestLogger(this.log);
     this.blobStore = options.homedir
@@ -248,7 +257,41 @@ export class Agent {
     });
   }
 
-  useProfile(profile: ResolvedAgentProfile, context?: PreparedSystemPromptContext): void {
+  useProfile(
+    profile: ResolvedAgentProfile,
+    context?: PreparedSystemPromptContext,
+    brandHome?: string,
+  ): void {
+    this.setActiveProfile(profile, brandHome);
+    this.updateSystemPromptFromProfile(profile, context);
+    this.tools.setActiveTools(profile.tools);
+  }
+
+  setActiveProfile(profile: ResolvedAgentProfile, brandHome?: string): void {
+    this.activeProfile = profile;
+    this.brandHome = brandHome;
+  }
+
+  /**
+   * Re-render the system prompt with freshly gathered runtime context (cwd
+   * listing, AGENTS.md, additional-dirs info, skill list). Called after
+   * compaction so the post-compaction turns do not keep a snapshot captured
+   * at session bootstrap. Invalidates the prompt-cache prefix by design.
+   */
+  async refreshSystemPrompt(): Promise<void> {
+    if (this.activeProfile === undefined) return;
+    const context = this.systemPromptContextProvider === undefined
+      ? await prepareSystemPromptContext(this.kaos, this.brandHome, {
+          additionalDirs: this.additionalDirs,
+        })
+      : await this.systemPromptContextProvider();
+    this.updateSystemPromptFromProfile(this.activeProfile, context);
+  }
+
+  private updateSystemPromptFromProfile(
+    profile: ResolvedAgentProfile,
+    context?: PreparedSystemPromptContext,
+  ): void {
     const systemPrompt = profile.systemPrompt({
       osEnv: this.kaos.osEnv,
       cwd: this.config.cwd,
@@ -258,7 +301,6 @@ export class Agent {
       additionalDirsInfo: context?.additionalDirsInfo,
     });
     this.config.update({ profileName: profile.name, systemPrompt });
-    this.tools.setActiveTools(profile.tools);
   }
 
   async resume(options?: AgentRecordsReplayOptions): Promise<{ warning?: string }> {
diff --git a/packages/agent-core/src/agent/injection/injector.ts b/packages/agent-core/src/agent/injection/injector.ts
index 504e412de..d13e18159 100644
--- a/packages/agent-core/src/agent/injection/injector.ts
+++ b/packages/agent-core/src/agent/injection/injector.ts
@@ -9,11 +9,8 @@ export abstract class DynamicInjector {
     this.injectedAt = null;
   }
 
-  onContextCompacted(compactedCount: number): void {
-    if (this.injectedAt !== null) {
-      const newInjectedAt = this.injectedAt - compactedCount + 1;
-      this.injectedAt = newInjectedAt >= 0 ? newInjectedAt : null;
-    }
+  onContextCompacted(): void {
+    this.injectedAt = null;
   }
 
   onContextMessageRemoved(index: number): void {
diff --git a/packages/agent-core/src/agent/injection/manager.ts b/packages/agent-core/src/agent/injection/manager.ts
index 99c9cd07e..7103f2cdd 100644
--- a/packages/agent-core/src/agent/injection/manager.ts
+++ b/packages/agent-core/src/agent/injection/manager.ts
@@ -40,16 +40,21 @@ export class InjectionManager {
     await this.activeGoalInjector()?.inject();
   }
 
+  async injectAfterCompaction(): Promise<void> {
+    await this.injectGoal();
+    await this.inject();
+  }
+
   onContextClear(): void {
     for (const injector of this.lifecycleInjectors()) {
       injector.onContextClear();
     }
   }
 
-  onContextCompacted(compactedCount: number): void {
+  onContextCompacted(): void {
     for (const injector of this.lifecycleInjectors()) {
       try {
-        injector.onContextCompacted(compactedCount);
+        injector.onContextCompacted();
       } catch {
         continue;
       }
diff --git a/packages/agent-core/src/agent/injection/permission-mode.ts b/packages/agent-core/src/agent/injection/permission-mode.ts
index 638ed6760..ffe5389ad 100644
--- a/packages/agent-core/src/agent/injection/permission-mode.ts
+++ b/packages/agent-core/src/agent/injection/permission-mode.ts
@@ -15,13 +15,20 @@ const AUTO_MODE_EXIT_REMINDER = [
 export class PermissionModeInjector extends DynamicInjector {
   protected override readonly injectionVariant = 'permission_mode';
   private lastMode: PermissionMode | undefined;
+  private refreshAfterCompaction = false;
+
+  override onContextCompacted(): void {
+    this.injectedAt = null;
+    this.refreshAfterCompaction = true;
+  }
 
   getInjection(): string | undefined {
     const mode = this.agent.permission.mode;
     const previousMode = this.lastMode;
 
-    if (mode === previousMode) return undefined;
+    if (!this.refreshAfterCompaction && mode === previousMode) return undefined;
 
+    this.refreshAfterCompaction = false;
     this.lastMode = mode;
     if (mode === 'auto') return AUTO_MODE_ENTER_REMINDER;
     if (previousMode === 'auto') return AUTO_MODE_EXIT_REMINDER;
diff --git a/packages/agent-core/src/agent/turn/index.ts b/packages/agent-core/src/agent/turn/index.ts
index df115b6d5..d2fb3c8c5 100644
--- a/packages/agent-core/src/agent/turn/index.ts
+++ b/packages/agent-core/src/agent/turn/index.ts
@@ -662,9 +662,15 @@ export class TurnFlow {
           },
           hooks: {
             beforeStep: async ({ signal: stepSignal }) => {
-              this.flushSteerBuffer();
               this.agent.microCompaction.detect();
               await this.agent.fullCompaction.beforeStep(stepSignal);
+              // Flush steered messages (background-task / cron notifications,
+              // user interrupts) AFTER compaction so they land in the
+              // post-compaction context instead of being dropped by it. The
+              // keep/drop decision lives in
+              // `compactionUserMessageDisposition()`; these origins are not
+              // re-injected later, so append them only after compaction runs.
+              this.flushSteerBuffer();
               await this.agent.injection.inject();
               deduper.beginStep();
               return;
diff --git a/packages/agent-core/src/index.ts b/packages/agent-core/src/index.ts
index 14dcec22a..ae63a8604 100644
--- a/packages/agent-core/src/index.ts
+++ b/packages/agent-core/src/index.ts
@@ -62,6 +62,12 @@ export type {
 export { AGENT_WIRE_PROTOCOL_VERSION } from './agent/records';
 export type { AgentConfigUpdateData } from './agent/config';
 export type { CompactionBeginData, CompactionResult } from './agent/compaction';
+export {
+  COMPACT_USER_MESSAGE_MAX_TOKENS,
+  collectCompactableUserMessages,
+  isRealUserInput,
+  selectRecentUserMessages,
+} from './agent/compaction';
 export type {
   PermissionApprovalResultRecord,
   PermissionMode,
diff --git a/packages/agent-core/src/services/message/transcript.ts b/packages/agent-core/src/services/message/transcript.ts
index e98bed516..a5ecfb7f1 100644
--- a/packages/agent-core/src/services/message/transcript.ts
+++ b/packages/agent-core/src/services/message/transcript.ts
@@ -3,8 +3,10 @@
  * agent from its `wire.jsonl` record log.
  *
  * Why: `ContextMemory.applyCompaction` rewrites the in-memory history as
- * `[compaction_summary, ...tail]`, so `getContext().history` only reflects the
- * model's CURRENT context. The wire log, however, keeps every record. The TUI
+ * `[...keptUserMessages, compaction_summary]` (the most recent real user
+ * prompts, verbatim within a token budget, followed by a single user-role
+ * summary), so `getContext().history` only reflects the model's CURRENT
+ * context. The wire log, however, keeps every record. The TUI
  * shows the full transcript on resume because `ReplayBuilder` captures every
  * `pushHistory` during record replay and is never folded by compaction. This
  * module reproduces that exact view for daemon REST consumers (web), without
@@ -19,8 +21,11 @@
  *                                     open assistant message; tool.result appends a
  *                                     tool message with the same `<system>` status
  *                                     wrapping as `toolResultOutputForModel`
- *   - `context.apply_compaction`    → keep the prefix, insert the summary message
- *                                     at the fold point (origin `compaction_summary`)
+ *   - `context.apply_compaction`    → keep the full history, append the
+ *                                     user-role summary marker (origin
+ *                                     `compaction_summary`), and recover
+ *                                     `foldedLength` from the recorded
+ *                                     `keptUserMessageCount`
  *   - `context.undo`                → remove tail messages exactly like
  *                                     `ContextMemory.undo` (skip injections, stop at
  *                                     compaction summaries / `context.clear` floors)
@@ -45,6 +50,12 @@ import path from 'node:path';
 import type { AgentRecord } from '../../agent/records';
 import type { ContextMessage } from '../../agent/context';
 import type { ExecutableToolResult, LoopRecordedEvent } from '../../loop';
+import {
+  COMPACT_USER_MESSAGE_MAX_TOKENS,
+  collectCompactableUserMessages,
+  isRealUserInput,
+  selectRecentUserMessages,
+} from '../../agent/compaction';
 
 type ContentPart = ContextMessage['content'][number];
 
@@ -212,7 +223,7 @@ export function reduceWireRecords(records: Iterable<AgentRecord>): {
       if (message.origin?.kind === 'compaction_summary') break;
       transcript.splice(i, 1);
       foldedLength = Math.max(0, foldedLength - 1);
-      if (isRealUserPrompt(message)) {
+      if (isRealUserInput(message)) {
         removedUserCount++;
         if (removedUserCount >= count) break;
       }
@@ -238,22 +249,40 @@ export function reduceWireRecords(records: Iterable<AgentRecord>): {
         applyLoopEvent(record.event, record.time);
         break;
       case 'context.apply_compaction': {
-        // ContextMemory drops history[0..compactedCount] and prepends the
-        // summary; we keep the prefix and insert the summary at the fold
-        // point so the transcript shows both.
-        const tailLength = Math.max(0, foldedLength - record.compactedCount);
-        transcript.splice(Math.max(0, transcript.length - tailLength), 0, {
+        // Mirrors ContextMemory.applyCompaction: the live context becomes the
+        // most recent user messages followed by a user-role summary. The
+        // transcript keeps the full history and appends the summary marker;
+        // foldedLength tracks the post-compaction live context length.
+        transcript.push({
           message: {
-            role: 'assistant',
+            role: 'user',
             content: [{ type: 'text', text: record.summary }],
             toolCalls: [],
             origin: { kind: 'compaction_summary' },
           },
           time: record.time,
         });
-        foldedLength = tailLength + 1;
-        openSteps.clear();
-        flushDeferredIfToolExchangeClosed();
+        // Prefer the kept-user count recorded by the live
+        // ContextMemory.applyCompaction. Re-deriving it from the full
+        // transcript would diverge from the live context: the transcript still
+        // holds the untruncated originals of messages the live context may
+        // have truncated, and (after a clear) messages the live context no
+        // longer has. Only fall back to re-deriving for legacy wire records
+        // that predate the field.
+        if (record.keptUserMessageCount !== undefined) {
+          foldedLength = record.keptUserMessageCount + 1;
+        } else {
+          const keptUserMessages = selectRecentUserMessages(
+            collectCompactableUserMessages(transcript.map((entry) => entry.message)),
+            COMPACT_USER_MESSAGE_MAX_TOKENS,
+          );
+          foldedLength = keptUserMessages.length + 1;
+        }
+        // Drop any open tool exchange and deferred messages exactly like
+        // ContextMemory.applyCompaction: late tool results become orphans and
+        // deferred injections are not rebuilt, so pending ids must not strand
+        // later appends in `deferred`.
+        resetOpenState();
         break;
       }
       case 'context.undo':
@@ -272,17 +301,6 @@ export function reduceWireRecords(records: Iterable<AgentRecord>): {
   return { entries: transcript as TranscriptEntry[], foldedLength };
 }
 
-/** Mirrors agent-core's `isRealUserPrompt` (context undo accounting). */
-function isRealUserPrompt(message: MutableMessage): boolean {
-  if (message.role !== 'user') return false;
-  const origin = message.origin;
-  if (origin === undefined || origin.kind === 'user') return true;
-  if (origin.kind === 'skill_activation') {
-    return origin.trigger === 'user-slash';
-  }
-  return false;
-}
-
 /** Mirrors agent-core's `toolResultOutputForModel` + `createToolMessage`. */
 function toolResultContent(result: ExecutableToolResult): ContentPart[] {
   const output = result.output;
diff --git a/packages/agent-core/src/services/session/sessionService.ts b/packages/agent-core/src/services/session/sessionService.ts
index 3b684a0dc..18e0370b4 100644
--- a/packages/agent-core/src/services/session/sessionService.ts
+++ b/packages/agent-core/src/services/session/sessionService.ts
@@ -1,6 +1,7 @@
 import { Disposable, IInstantiationService, InstantiationType, registerSingleton } from '../../di';
 import { Emitter } from '../../base/common/event';
 import { ErrorCodes, KimiError } from '../../errors';
+import { isRealUserInput } from '../../agent/compaction';
 import type { AgentContextData, ContextMessage } from '../../agent/context';
 import type { JsonObject, ListSessionsPayload, SessionSummary } from '../../rpc';
 import type { SessionMeta } from '../../session';
@@ -59,7 +60,7 @@ function canUndoHistory(history: readonly ContextMessage[], count: number): bool
     if (message === undefined) continue;
     if (message.origin?.kind === 'injection') continue;
     if (message.origin?.kind === 'compaction_summary') return false;
-    if (isRealUserPrompt(message)) {
+    if (isRealUserInput(message)) {
       found++;
       if (found >= count) return true;
     }
@@ -67,13 +68,6 @@ function canUndoHistory(history: readonly ContextMessage[], count: number): bool
   return false;
 }
 
-function isRealUserPrompt(message: ContextMessage): boolean {
-  if (message.role !== 'user') return false;
-  const origin = message.origin;
-  if (origin === undefined || origin.kind === 'user') return true;
-  return origin.kind === 'skill_activation' && origin.trigger === 'user-slash';
-}
-
 function pageContextMessages(
   sessionId: string,
   sessionCreatedAtMs: number,
diff --git a/packages/agent-core/src/session/index.ts b/packages/agent-core/src/session/index.ts
index a2bb022b5..c49de7f9e 100644
--- a/packages/agent-core/src/session/index.ts
+++ b/packages/agent-core/src/session/index.ts
@@ -470,7 +470,7 @@ export class Session {
       this.options.kimiHomeDir,
       { additionalDirs: this.additionalDirs },
     );
-    agent.useProfile(profile, context);
+    agent.useProfile(profile, context, this.options.kimiHomeDir);
     const { agentsMdWarning } = context;
     if (agentsMdWarning !== undefined) {
       this.agentsMdWarning = agentsMdWarning;
@@ -718,7 +718,8 @@ export class Session {
   ): Agent {
     const parentAgent = parentAgentId !== null ? this.getReadyAgent(parentAgentId) : undefined;
     const cwd = parentAgent?.config.cwd ?? this.toolKaos.getcwd();
-    return new Agent({
+    let agent!: Agent;
+    agent = new Agent({
       ...config,
       type,
       kaos: this.toolKaos.withCwd(cwd),
@@ -737,7 +738,14 @@ export class Session {
       pluginSessionStarts: type === 'main' ? this.options.pluginSessionStarts : undefined,
       experimentalFlags: this.experimentalFlags,
       additionalDirs: parentAgent?.getAdditionalDirs() ?? this.additionalDirs,
+      systemPromptContextProvider: () =>
+        prepareSystemPromptContext(
+          this.systemContextKaos(agent.kaos.getcwd()),
+          this.options.kimiHomeDir,
+          { additionalDirs: agent.getAdditionalDirs() },
+        ),
     });
+    return agent;
   }
 
   private permissionOptions(
@@ -810,6 +818,7 @@ export class Session {
     try {
       const agent = this.instantiateAgent(id, meta.homedir, meta.type, {}, parentAgentId);
       const result = await agent.resume();
+      this.restoreAgentProfileHandle(agent, meta, parent?.agent);
       this.agents.set(id, agent);
       return { agent, warning: parent?.warning ?? result.warning };
     } catch (error) {
@@ -821,6 +830,34 @@ export class Session {
     }
   }
 
+  private restoreAgentProfileHandle(
+    agent: Agent,
+    meta: AgentMeta,
+    parentAgent: Agent | undefined,
+  ): void {
+    if (agent.config.systemPrompt === '') return;
+    const profile = this.resolvePersistedProfile(agent, meta, parentAgent);
+    if (profile === undefined) return;
+    agent.setActiveProfile(profile, this.options.kimiHomeDir);
+  }
+
+  private resolvePersistedProfile(
+    agent: Agent,
+    meta: AgentMeta,
+    parentAgent: Agent | undefined,
+  ): ResolvedAgentProfile | undefined {
+    const profileName = agent.config.profileName;
+    if (profileName === undefined) return undefined;
+    if (meta.type === 'sub') {
+      const parentProfileName = parentAgent?.config.profileName;
+      return (
+        DEFAULT_AGENT_PROFILES[parentProfileName ?? 'agent']?.subagents?.[profileName] ??
+        DEFAULT_AGENT_PROFILES['agent']?.subagents?.[profileName]
+      );
+    }
+    return DEFAULT_AGENT_PROFILES[profileName];
+  }
+
   private nextGeneratedAgentId(): string {
     while (true) {
       const id = `agent-${this.agentIdCounter++}`;
diff --git a/packages/agent-core/src/session/subagent-host.ts b/packages/agent-core/src/session/subagent-host.ts
index 5153acea5..01c3063f3 100644
--- a/packages/agent-core/src/session/subagent-host.ts
+++ b/packages/agent-core/src/session/subagent-host.ts
@@ -374,7 +374,7 @@ export class SessionSubagentHost {
       this.session.options.kimiHomeDir,
       { additionalDirs: child.getAdditionalDirs() },
     );
-    child.useProfile(profile, context);
+    child.useProfile(profile, context, this.session.options.kimiHomeDir);
     child.tools.inheritUserTools(parent.tools);
   }
 
diff --git a/packages/agent-core/src/utils/tokens.ts b/packages/agent-core/src/utils/tokens.ts
index fe567f732..af8b70152 100644
--- a/packages/agent-core/src/utils/tokens.ts
+++ b/packages/agent-core/src/utils/tokens.ts
@@ -1,6 +1,19 @@
 import type { ContentPart, Message, Tool } from '@moonshot-ai/kosong';
 
-const messageTokenEstimateCache = new WeakMap<Message, number>();
+/**
+ * Structural subset of kosong's {@link Message} that token estimation reads.
+ * Accepting the subset (instead of the full `Message`) lets callers with
+ * message-shaped objects — such as the compaction helpers in `memento.ts`,
+ * which carry only `role`/`content`/`origin` — estimate tokens without an
+ * unsafe cast, while full `Message` values still satisfy it.
+ */
+interface TokenEstimatableMessage {
+  readonly role: string;
+  readonly content: readonly ContentPart[];
+  readonly toolCalls?: readonly { readonly name: string; readonly arguments: unknown }[];
+}
+
+const messageTokenEstimateCache = new WeakMap<TokenEstimatableMessage, number>();
 
 /**
  * Estimate token count from text using a character-based heuristic.
@@ -41,7 +54,7 @@ export function estimateTokensForTools(tools: readonly Tool[]): number {
   return total;
 }
 
-export function estimateTokensForMessage(message: Message): number {
+export function estimateTokensForMessage(message: TokenEstimatableMessage): number {
   const cached = messageTokenEstimateCache.get(message);
   if (cached !== undefined) {
     return cached;
diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
index 8adf12c6e..284c48cdd 100644
--- a/packages/agent-core/test/agent/compaction/full.test.ts
+++ b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -18,10 +18,14 @@ import { afterEach, describe, expect, it, vi } from 'vitest';
 
 import type { KimiConfig } from '../../../src/config';
 import type { AgentOptions } from '../../../src/agent';
-import { DefaultCompactionStrategy, type CompactionStrategy } from '../../../src/agent/compaction';
+import {
+  COMPACTION_SUMMARY_PREFIX,
+  DefaultCompactionStrategy,
+  type CompactionStrategy,
+} from '../../../src/agent/compaction';
 import { FLAG_DEFINITIONS, MASTER_ENV } from '../../../src/flags';
 import { HookEngine, type HookEngineTriggerArgs } from '../../../src/session/hooks';
-import { estimateTokensForMessages } from '../../../src/utils/tokens';
+import { estimateTokens, estimateTokensForMessages } from '../../../src/utils/tokens';
 import { recordingTelemetry, type TelemetryRecord } from '../../fixtures/telemetry';
 import type { TestAgentContext, TestAgentOptions } from '../harness/agent';
 import { testAgent } from '../harness/agent';
@@ -44,138 +48,6 @@ const CATALOGUED_MODEL_CAPABILITIES = {
 const MICRO_COMPACTION_FLAG_ENV = getMicroCompactionFlagEnv();
 
 describe('FullCompaction', () => {
-  it('keeps an oversized trailing user message as recent', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', `pending user ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('keeps consecutive trailing user messages as recent', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', `pending user one ${'x'.repeat(1_200)}`),
-      textMessage('user', `pending user two ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('compacts the prefix when the trailing exchange itself is oversized', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', 'recent user'),
-      textMessage('assistant', `recent assistant ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('returns 0 when there is nothing to compact', () => {
-    const strategy = testCompactionStrategy();
-    expect(strategy.computeCompactCount([], 'auto')).toBe(0);
-    expect(strategy.computeCompactCount([textMessage('user', 'only pending')], 'auto')).toBe(0);
-    expect(
-      strategy.computeCompactCount(
-        [
-          textMessage('user', 'a'),
-          textMessage('user', 'b'),
-          textMessage('user', 'c'),
-        ],
-        'auto',
-      ),
-    ).toBe(0);
-  });
-
-  it('returns 0 when no intermediate split exists and the last message is also unsplittable', () => {
-    const strategy = testCompactionStrategy();
-    const messages: Message[] = [
-      textMessage('user', 'inspect'),
-      {
-        role: 'assistant',
-        content: [],
-        toolCalls: [{ type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' }],
-      },
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(0);
-  });
-
-  it('does not split inside a parallel tool exchange', () => {
-    const strategy = testCompactionStrategy();
-    const messages: Message[] = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', 'run both tools'),
-      {
-        role: 'assistant',
-        content: [],
-        toolCalls: [
-          { type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' },
-          { type: 'function', id: 'call_b', name: 'Lookup', arguments: '{}' },
-        ],
-      },
-      { role: 'tool', content: [{ type: 'text', text: 'a' }], toolCalls: [], toolCallId: 'call_a' },
-      { role: 'tool', content: [{ type: 'text', text: 'b' }], toolCalls: [], toolCallId: 'call_b' },
-      textMessage('user', 'next prompt'),
-    ];
-
-    // The only valid split is before the parallel exchange (after 'old assistant'),
-    // never between tool_a and tool_b — that would leave tool_b as an orphan.
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('reserves response context by default before the ratio threshold is reached', () => {
-    const strategy = new DefaultCompactionStrategy(() => 256_000);
-
-    expect(strategy.shouldCompact(210_000)).toBe(true);
-    expect(strategy.shouldBlock(210_000)).toBe(true);
-  });
-
-  it('backs off overflow compaction by at least five percent of the context window', () => {
-    const strategy = testCompactionStrategy(1_000);
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      ...Array.from({ length: 20 }, () => [
-        textMessage('user', 'continue'),
-        textMessage('assistant', ''),
-      ]).flat(),
-    ];
-
-    const reduced = strategy.reduceCompactOnOverflow(messages);
-    const removed = messages.slice(reduced);
-
-    expect(reduced).toBeGreaterThan(0);
-    expect(estimateTokensForMessages(removed)).toBeGreaterThanOrEqual(50);
-  });
-
-  it('ignores reserved context when the reserve is not smaller than the model window', () => {
-    const strategy = new DefaultCompactionStrategy(() => 32_000, {
-      triggerRatio: 0.85,
-      blockRatio: 0.85,
-      reservedContextSize: 50_000,
-      maxCompactionPerTurn: 3,
-      maxRecentMessages: 3,
-      maxRecentUserMessages: Infinity,
-      maxRecentSizeRatio: 0.2,
-      minOverflowReductionRatio: 0.05,
-    });
-
-    expect(strategy.shouldCompact(1)).toBe(false);
-    expect(strategy.shouldBlock(1)).toBe(false);
-    expect(strategy.shouldCompact(28_000)).toBe(true);
-    expect(strategy.shouldBlock(28_000)).toBe(true);
-  });
-
   it('runs manual compaction and applies the compacted context', async () => {
     const records: TelemetryRecord[] = [];
     const ctx = testAgent({ telemetry: recordingTelemetry(records) });
@@ -204,12 +76,12 @@ describe('FullCompaction', () => {
       [wire] context.append_message     { "message": { "role": "user", "content": [ { "type": "text", "text": "recent user three" } ], "toolCalls": [], "origin": { "kind": "user" } }, "time": "<time>" }
       [wire] full_compaction.begin      { "source": "manual", "instruction": "Keep the important test facts.", "time": "<time>" }
       [emit] compaction.started         { "trigger": "manual", "instruction": "Keep the important test facts." }
-      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 520, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 120, "maxContextTokens": 256000, "contextUsage": 0.00046875, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 520, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 520, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction   { "summary": "Compacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 5, "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 5, "maxContextTokens": 256000, "contextUsage": 0.00001953125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 520, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 520, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 507, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 120, "maxContextTokens": 256000, "contextUsage": 0.00046875, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 507, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 507, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.apply_compaction   { "summary": "Compacted summary.", "contextSummary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119, "keptUserMessageCount": 3, "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 119, "maxContextTokens": 256000, "contextUsage": 0.00046484375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 507, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 507, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete   { "time": "<time>" }
-      [emit] compaction.completed       { "result": { "summary": "Compacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 5 } }
+      [emit] compaction.completed       { "result": { "summary": "Compacted summary.", "compactedCount": 6, "tokensBefore": 39, "tokensAfter": 119, "keptUserMessageCount": 3 } }
     `);
     expect(ctx.lastLlmInput()).toMatchInlineSnapshot(`
       system: <system-prompt>
@@ -221,13 +93,26 @@ describe('FullCompaction', () => {
         assistant: text "old assistant two"
         user: text "recent user three"
         assistant: text "recent assistant three"
-        user: text <compaction-instruction>
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nYou are now given a task to compact this conversation context according to the priorities and output requirements below.\\n\\nThe goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.\\n\\nCompression priorities, in order:\\n\\n1. Current Task State: what is being worked on right now\\n2. Errors & Solutions: unresolved or recurring errors and their resolutions\\n3. Code Evolution: final working versions only; remove intermediate attempts\\n4. System Context: project structure, dependencies, environment setup\\n5. Design Decisions: architectural choices and their rationale\\n6. TODO Items: unfinished tasks and known issues\\n\\nRequired output structure:\\n\\n## Current Focus\\n\\n[What we're working on now]\\n\\n## Environment\\n\\n- [Key setup/config points]\\n- ...\\n\\n## Completed Tasks\\n\\n- [Task]: [Brief outcome]\\n- ...\\n\\n## Active Issues\\n\\n- [Issue]: [Status/Next steps]\\n- ...\\n\\n## Code State\\n\\n### [Critical file name]\\n\\n[Brief description of the file's purpose and current state]\\n\\n\`\`\`\\n[The latest version of critical code snippets in this file, <20 lines]\\n\`\`\`\\n\\n### [Critical file name]\\n\\n- [Useful classes/methods/functions]: [Brief description/usage]\\n- ...\\n\\nOmit non-critical code, intermediate attempts, and resolved errors.\\n\\n## Important Context\\n\\n- [Any crucial information not covered above]\\n- ...\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history.\\n\\n\\nOptional user instruction:\\nKeep the important test facts."
     `);
     expect(ctx.compactHistory()).toMatchInlineSnapshot(`
       [
         {
-          "role": "assistant",
-          "text": "Compacted summary.",
+          "role": "user",
+          "text": "old user one",
+        },
+        {
+          "role": "user",
+          "text": "old user two",
+        },
+        {
+          "role": "user",
+          "text": "recent user three",
+        },
+        {
+          "role": "user",
+          "text": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
+      Compacted summary.",
         },
       ]
     `);
@@ -236,18 +121,169 @@ describe('FullCompaction', () => {
       properties: expect.objectContaining({
         source: 'manual',
         tokens_before: 39,
-        tokens_after: 5,
+        tokens_after: 119,
         duration_ms: expect.any(Number),
         compacted_count: 6,
         retry_count: 0,
         thinking_level: 'off',
-        input_tokens: 520,
+        input_tokens: 507,
         output_tokens: 8,
       }),
     });
     await ctx.expectResumeMatches();
   });
 
+  it('emits the raw summary while keeping the prefixed summary in model context', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
+
+    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.completed');
+
+    const completedEvent = ctx.allEvents.find((entry) => entry.event === 'compaction.completed');
+    expect(completedEvent?.args).toEqual({
+      result: expect.objectContaining({
+        summary: 'Compacted summary.',
+      }),
+    });
+    expect(completedEvent?.args).not.toEqual({
+      result: expect.objectContaining({
+        summary: expect.stringContaining(COMPACTION_SUMMARY_PREFIX),
+      }),
+    });
+    expect(ctx.agent.context.history.at(-1)?.content).toEqual([
+      { type: 'text', text: `${COMPACTION_SUMMARY_PREFIX}\nCompacted summary.` },
+    ]);
+  });
+
+  it('keeps only real user input and re-injects permission reminders after compaction', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.appendExchange(1, 'real user one', 'assistant one', 20);
+    ctx.agent.context.appendBashInput('pwd');
+    ctx.agent.context.appendBashOutput('/tmp/repo', '', false);
+    ctx.agent.context.appendLocalCommandStdout('local command output');
+    ctx.agent.context.appendSystemReminder('stale reminder', {
+      kind: 'injection',
+      variant: 'system_reminder',
+    });
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: 'background task done' }], {
+      kind: 'background_task',
+      taskId: 'task-1',
+      status: 'completed',
+      notificationId: 'notification-1',
+    });
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: 'real user two' }]);
+    ctx.agent.permission.setMode('auto');
+
+    const permissionReminder = new Promise<void>((resolve) => {
+      const handler = (entry: unknown) => {
+        const record = entry as {
+          event?: string;
+          args?: { message?: { origin?: { kind?: string; variant?: string } } };
+        };
+        const origin = record.args?.message?.origin;
+        if (
+          record.event === 'context.append_message' &&
+          origin?.kind === 'injection' &&
+          origin.variant === 'permission_mode'
+        ) {
+          ctx.emitter.off('context.append_message', handler);
+          resolve();
+        }
+      };
+      ctx.emitter.on('context.append_message', handler);
+    });
+
+    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.completed');
+    await permissionReminder;
+
+    expect(ctx.agent.context.history.map((message) => message.origin?.kind ?? 'user')).toEqual([
+      'user',
+      'user',
+      'compaction_summary',
+      'injection',
+    ]);
+    expect(
+      ctx.agent.context.history.map((message) =>
+        message.origin?.kind === 'injection' ? message.origin.variant : undefined,
+      ),
+    ).toEqual([undefined, undefined, undefined, 'permission_mode']);
+
+    const applyCompaction = [...ctx.allEvents]
+      .toReversed()
+      .find((entry) => entry.type === '[wire]' && entry.event === 'context.apply_compaction');
+    expect(applyCompaction).toBeDefined();
+    const record = applyCompaction?.args as {
+      keptUserMessageCount?: number;
+      tokensAfter?: number;
+      summary?: string;
+      contextSummary?: string;
+    };
+    expect(record.keptUserMessageCount).toBe(2);
+    const expectedContextSummary = `${COMPACTION_SUMMARY_PREFIX}\nCompacted summary.`;
+    expect(record.summary).toBe('Compacted summary.');
+    expect(record.contextSummary).toBe(expectedContextSummary);
+    expect(record.tokensAfter).toBe(
+      estimateTokens(expectedContextSummary) +
+        estimateTokensForMessages(ctx.agent.context.history.slice(0, 2)),
+    );
+  });
+
+  it('refreshes the system prompt after compaction completes', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
+    ctx.appendExchange(2, 'recent user two', 'recent assistant two', 40);
+
+    const refreshSpy = vi.spyOn(ctx.agent, 'refreshSystemPrompt');
+
+    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.completed');
+
+    expect(refreshSpy).toHaveBeenCalledTimes(1);
+  });
+
+  it('does not reset active tools while refreshing the system prompt after compaction', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.agent.useProfile({
+      name: 'tool-profile',
+      systemPrompt: () => '<profile-prompt>',
+      tools: ['Read', 'Write'],
+    });
+    ctx.agent.tools.setActiveTools(['Read']);
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
+
+    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
+    await ctx.rpc.beginCompaction({});
+    await ctx.once('compaction.completed');
+
+    const activeTools = ctx.agent.tools
+      .data()
+      .filter((tool) => tool.active)
+      .map((tool) => tool.name)
+      .toSorted();
+    expect(activeTools).toEqual(['Read']);
+  });
+
   it('projects the compacted prefix before sending the summary request', async () => {
     const ctx = testAgent({ compactionStrategy: alwaysCompactOnce });
     ctx.configure({
@@ -385,7 +421,9 @@ describe('FullCompaction', () => {
     expect(authKeys).toEqual(['fresh-token', 'forced-refresh-token', 'fresh-token']);
     expect(tokenCalls).toEqual([undefined, true, undefined]);
     expect(ctx.compactHistory()).toEqual([
-      { role: 'assistant', text: 'Recovered compacted summary.' },
+      { role: 'user', text: 'old user one' },
+      { role: 'user', text: 'recent user two' },
+      { role: 'user', text: `${COMPACTION_SUMMARY_PREFIX}\nRecovered compacted summary.` },
     ]);
     await ctx.expectResumeMatches();
   });
@@ -547,20 +585,22 @@ describe('FullCompaction', () => {
     await completed;
 
     expect(attempts).toBe(3);
-    // Each empty summary shrinks the compacted prefix before retrying, so the
-    // recovered summary compacts only the older exchange and leaves the recent
-    // one in history.
+    // Empty summaries are retried without shrinking the history; the recovered
+    // summary replaces the whole history with the real user messages plus the
+    // prefixed summary.
     expect(ctx.compactHistory()).toEqual([
-      { role: 'assistant', text: 'Recovered compacted summary.' },
+      { role: 'user', text: 'old user one' },
       { role: 'user', text: 'recent user two' },
-      { role: 'assistant', text: 'recent assistant two' },
+      { role: 'user', text: `${COMPACTION_SUMMARY_PREFIX}\nRecovered compacted summary.` },
     ]);
     expect(
       ctx.allEvents.filter((event) => event.event === 'compaction.completed'),
     ).toEqual([
       expect.objectContaining({
         args: expect.objectContaining({
-          result: expect.objectContaining({ summary: 'Recovered compacted summary.' }),
+          result: expect.objectContaining({
+            summary: expect.stringContaining('Recovered compacted summary.'),
+          }),
         }),
       }),
     ]);
@@ -603,12 +643,12 @@ describe('FullCompaction', () => {
     await completed;
 
     expect(inputs).toHaveLength(2);
-    // The retry compacts a strictly smaller prefix than the first attempt.
+    // The retry sends a strictly smaller input than the first attempt.
     expect(inputs[1]!.length).toBeLessThan(inputs[0]!.length);
     expect(ctx.compactHistory()).toEqual([
-      { role: 'assistant', text: 'Recovered compacted summary.' },
+      { role: 'user', text: 'old user one' },
       { role: 'user', text: 'recent user two' },
-      { role: 'assistant', text: 'recent assistant two' },
+      { role: 'user', text: `${COMPACTION_SUMMARY_PREFIX}\nRecovered compacted summary.` },
     ]);
     await ctx.expectResumeMatches();
   });
@@ -640,8 +680,10 @@ describe('FullCompaction', () => {
     await vi.advanceTimersByTimeAsync(60_000);
     await failed;
 
-    // MAX_COMPACTION_RETRY_ATTEMPTS attempts, with prefix reduction between them.
-    expect(inputs).toHaveLength(5);
+    // Each empty/think-only response drops the oldest item and resets the retry
+    // counter; once only one item remains, MAX_COMPACTION_RETRY_ATTEMPTS more
+    // retries run before failing. 3 drops + 5 retries = 8 generate calls.
+    expect(inputs).toHaveLength(8);
     expect(inputs[1]!.length).toBeLessThan(inputs[0]!.length);
     expect(records).toContainEqual({
       event: 'compaction_failed',
@@ -831,7 +873,9 @@ describe('FullCompaction', () => {
     await vi.advanceTimersByTimeAsync(60_000);
     const events = await ctx.untilTurnEnd();
 
-    expect(attempts).toBe(5);
+    // A single-item history cannot be shrunk further, so the truncated response
+    // fails immediately instead of looping through retries.
+    expect(attempts).toBe(1);
     expect(events).toContainEqual(
       expect.objectContaining({
         event: 'turn.ended',
@@ -929,13 +973,18 @@ describe('FullCompaction', () => {
       messages:
         user: text "old user one"
         assistant: text "old assistant one"
-        user: text <compaction-instruction>
+        user: text "run both tools"
+        assistant: []  calls call_open_one:LookupOne { "query": "one" }, call_open_two:LookupTwo { "query": "two" }
+        tool[call_open_one]: text "one result"
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nYou are now given a task to compact this conversation context according to the priorities and output requirements below.\\n\\nThe goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.\\n\\nCompression priorities, in order:\\n\\n1. Current Task State: what is being worked on right now\\n2. Errors & Solutions: unresolved or recurring errors and their resolutions\\n3. Code Evolution: final working versions only; remove intermediate attempts\\n4. System Context: project structure, dependencies, environment setup\\n5. Design Decisions: architectural choices and their rationale\\n6. TODO Items: unfinished tasks and known issues\\n\\nRequired output structure:\\n\\n## Current Focus\\n\\n[What we're working on now]\\n\\n## Environment\\n\\n- [Key setup/config points]\\n- ...\\n\\n## Completed Tasks\\n\\n- [Task]: [Brief outcome]\\n- ...\\n\\n## Active Issues\\n\\n- [Issue]: [Status/Next steps]\\n- ...\\n\\n## Code State\\n\\n### [Critical file name]\\n\\n[Brief description of the file's purpose and current state]\\n\\n\`\`\`\\n[The latest version of critical code snippets in this file, <20 lines]\\n\`\`\`\\n\\n### [Critical file name]\\n\\n- [Useful classes/methods/functions]: [Brief description/usage]\\n- ...\\n\\nOmit non-critical code, intermediate attempts, and resolved errors.\\n\\n## Important Context\\n\\n- [Any crucial information not covered above]\\n- ...\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history.\\n\\n\\nOptional user instruction:\\nKeep stable facts."
     `);
+    // The unresolved tool exchange is sent to the model (see the compaction input
+    // above) but is dropped from the replacement history, leaving only the real
+    // user messages followed by the compaction summary.
     expect(ctx.agent.context.history.map((message) => message.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
+      'user',
+      'user',
     ]);
     ctx.dispatch({
       type: 'context.append_loop_event',
@@ -947,11 +996,9 @@ describe('FullCompaction', () => {
       },
     });
     expect(ctx.agent.context.history.map((message) => message.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
-      'tool',
+      'user',
+      'user',
     ]);
     await ctx.expectResumeMatches();
   });
@@ -979,12 +1026,12 @@ describe('FullCompaction', () => {
       [wire] full_compaction.begin      { "source": "manual", "time": "<time>" }
       [emit] compaction.started         { "trigger": "manual" }
       [wire] context.append_message     { "message": { "role": "user", "content": [ { "type": "text", "text": "new user while compacting" } ], "toolCalls": [], "origin": { "kind": "user" } }, "time": "<time>" }
-      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 499, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 80, "maxContextTokens": 256000, "contextUsage": 0.0003125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 499, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 499, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction   { "summary": "Compacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 5, "time": "<time>" }
-      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 5, "maxContextTokens": 256000, "contextUsage": 0.00001953125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 499, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 499, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record               { "model": "kimi-code", "usage": { "inputOther": 478, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 80, "maxContextTokens": 256000, "contextUsage": 0.0003125, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 478, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 478, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.apply_compaction   { "summary": "Compacted prefix.", "contextSummary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nCompacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 122, "keptUserMessageCount": 3, "time": "<time>" }
+      [emit] agent.status.updated       { "model": "kimi-code", "contextTokens": 122, "maxContextTokens": 256000, "contextUsage": 0.0004765625, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 478, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 478, "output": 8, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete   { "time": "<time>" }
-      [emit] compaction.completed       { "result": { "summary": "Compacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 5 } }
+      [emit] compaction.completed       { "result": { "summary": "Compacted prefix.", "compactedCount": 4, "tokensBefore": 25, "tokensAfter": 122, "keptUserMessageCount": 3 } }
     `);
     expect(ctx.lastLlmInput()).toMatchInlineSnapshot(`
       system: <system-prompt>
@@ -994,116 +1041,32 @@ describe('FullCompaction', () => {
         assistant: text "old assistant one"
         user: text "recent user two"
         assistant: text "recent assistant two"
-        user: text <compaction-instruction>
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nYou are now given a task to compact this conversation context according to the priorities and output requirements below.\\n\\nThe goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.\\n\\nCompression priorities, in order:\\n\\n1. Current Task State: what is being worked on right now\\n2. Errors & Solutions: unresolved or recurring errors and their resolutions\\n3. Code Evolution: final working versions only; remove intermediate attempts\\n4. System Context: project structure, dependencies, environment setup\\n5. Design Decisions: architectural choices and their rationale\\n6. TODO Items: unfinished tasks and known issues\\n\\nRequired output structure:\\n\\n## Current Focus\\n\\n[What we're working on now]\\n\\n## Environment\\n\\n- [Key setup/config points]\\n- ...\\n\\n## Completed Tasks\\n\\n- [Task]: [Brief outcome]\\n- ...\\n\\n## Active Issues\\n\\n- [Issue]: [Status/Next steps]\\n- ...\\n\\n## Code State\\n\\n### [Critical file name]\\n\\n[Brief description of the file's purpose and current state]\\n\\n\`\`\`\\n[The latest version of critical code snippets in this file, <20 lines]\\n\`\`\`\\n\\n### [Critical file name]\\n\\n- [Useful classes/methods/functions]: [Brief description/usage]\\n- ...\\n\\nOmit non-critical code, intermediate attempts, and resolved errors.\\n\\n## Important Context\\n\\n- [Any crucial information not covered above]\\n- ...\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history."
     `);
     expect(ctx.compactHistory()).toMatchInlineSnapshot(`
       [
         {
-          "role": "assistant",
-          "text": "Compacted prefix.",
+          "role": "user",
+          "text": "old user one",
+        },
+        {
+          "role": "user",
+          "text": "recent user two",
         },
         {
           "role": "user",
           "text": "new user while compacting",
         },
+        {
+          "role": "user",
+          "text": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
+      Compacted prefix.",
+        },
       ]
     `);
     await ctx.expectResumeMatches();
   });
 
-  it('continues a manual compaction run when the first pass still exceeds the trigger', async () => {
-    const ctx = testAgent();
-    ctx.configure({
-      provider: CATALOGUED_PROVIDER,
-      modelCapabilities: {
-        ...CATALOGUED_MODEL_CAPABILITIES,
-        max_context_tokens: 4_000,
-      },
-    });
-    ctx.appendExchange(
-      1,
-      `old user one ${'u'.repeat(14_000)}`,
-      `old assistant one ${'a'.repeat(14_000)}`,
-      6_000,
-    );
-    const firstSummary = `large manual summary ${'x'.repeat(14_000)}`;
-    let appliedCount = 0;
-    const secondCompacted = new Promise<void>((resolve) => {
-      const handler = () => {
-        appliedCount += 1;
-        if (appliedCount === 2) {
-          ctx.emitter.off('context.apply_compaction', handler);
-          resolve();
-        }
-      };
-      ctx.emitter.on('context.apply_compaction', handler);
-    });
-
-    ctx.mockNextResponse({ type: 'text', text: firstSummary });
-    ctx.mockNextResponse({ type: 'text', text: 'Second manual summary.' });
-    const completed = ctx.once('compaction.completed');
-    await ctx.rpc.beginCompaction({});
-    ctx.appendExchange(2, 'new user while compacting', 'new assistant while compacting', 6_000);
-    await secondCompacted;
-    await completed;
-
-    const events = ctx.newEvents();
-    expect(countEvents(events, 'context.apply_compaction')).toBe(2);
-    expect(countEvents(events, 'compaction.started')).toBe(1);
-    expect(countEvents(events, 'compaction.completed')).toBe(1);
-    expect(ctx.llmCalls).toHaveLength(2);
-    const [firstCompactionCall, secondCompactionCall] = ctx.llmCalls;
-    expect(firstCompactionCall?.history.map(messageText)).not.toContain('new user while compacting');
-    expect(secondCompactionCall?.history.map(messageText)).toContain(firstSummary);
-    expect(secondCompactionCall?.history.map(messageText)).toContain('new user while compacting');
-    expect(secondCompactionCall?.history.map(messageText)).toContain('new assistant while compacting');
-    expect(ctx.compactHistory()).toEqual([
-      {
-        role: 'assistant',
-        text: 'Second manual summary.',
-      },
-    ]);
-    await ctx.expectResumeMatches();
-  });
-
-  it('auto-compacts very large context in window-sized rounds', async () => {
-    const maxContextTokens = 4_000;
-    const ctx = testAgent();
-    ctx.configure({
-      provider: CATALOGUED_PROVIDER,
-      modelCapabilities: {
-        ...CATALOGUED_MODEL_CAPABILITIES,
-        max_context_tokens: maxContextTokens,
-      },
-    });
-    for (let i = 1; i <= 22; i++) {
-      ctx.appendAssistantTextWithUsage(
-        i,
-        `history chunk ${String(i)} ${'x'.repeat(7_200)}`,
-        i * 1_850,
-      );
-    }
-    const initialTokens = estimateTokensForMessages(ctx.agent.context.history);
-    const completed = ctx.once('compaction.completed');
-    for (let i = 1; i <= 30; i++) {
-      ctx.mockNextResponse({ type: 'text', text: `Auto summary ${String(i)}.` });
-    }
-
-    ctx.agent.fullCompaction.begin({ source: 'auto', instruction: undefined });
-    await completed;
-
-    const events = ctx.newEvents();
-    const compactedPrefixSizes = ctx.llmCalls.map((call) =>
-      estimateTokensForMessages(call.history.slice(0, -1)),
-    );
-    expect(initialTokens).toBeGreaterThan(maxContextTokens * 9);
-    expect(countEvents(events, 'context.apply_compaction')).toBeGreaterThan(1);
-    expect(countEvents(events, 'compaction.completed')).toBe(1);
-    expect(compactedPrefixSizes.length).toBeGreaterThan(1);
-    expect(compactedPrefixSizes.every((size) => size <= maxContextTokens)).toBe(true);
-    expect(ctx.agent.context.tokenCount).toBeLessThan(maxContextTokens * 0.85);
-    await ctx.expectResumeMatches();
-  });
 
   it('cancels when the compacted prefix changes before completion', async () => {
     const ctx = testAgent();
@@ -1127,8 +1090,8 @@ describe('FullCompaction', () => {
       [emit] compaction.started       { "trigger": "manual" }
       [wire] context.clear            { "time": "<time>" }
       [emit] agent.status.updated     { "model": "kimi-code", "contextTokens": 0, "maxContextTokens": 256000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual" }
-      [wire] usage.record             { "model": "kimi-code", "usage": { "inputOther": 499, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated     { "model": "kimi-code", "contextTokens": 0, "maxContextTokens": 256000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 499, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 499, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record             { "model": "kimi-code", "usage": { "inputOther": 478, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated     { "model": "kimi-code", "contextTokens": 0, "maxContextTokens": 256000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 478, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 478, "output": 7, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.cancel   { "time": "<time>" }
       [emit] compaction.cancelled     {}
     `);
@@ -1140,7 +1103,7 @@ describe('FullCompaction', () => {
         assistant: text "old assistant one"
         user: text "recent user two"
         assistant: text "recent assistant two"
-        user: text <compaction-instruction>
+        user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nYou are now given a task to compact this conversation context according to the priorities and output requirements below.\\n\\nThe goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.\\n\\nCompression priorities, in order:\\n\\n1. Current Task State: what is being worked on right now\\n2. Errors & Solutions: unresolved or recurring errors and their resolutions\\n3. Code Evolution: final working versions only; remove intermediate attempts\\n4. System Context: project structure, dependencies, environment setup\\n5. Design Decisions: architectural choices and their rationale\\n6. TODO Items: unfinished tasks and known issues\\n\\nRequired output structure:\\n\\n## Current Focus\\n\\n[What we're working on now]\\n\\n## Environment\\n\\n- [Key setup/config points]\\n- ...\\n\\n## Completed Tasks\\n\\n- [Task]: [Brief outcome]\\n- ...\\n\\n## Active Issues\\n\\n- [Issue]: [Status/Next steps]\\n- ...\\n\\n## Code State\\n\\n### [Critical file name]\\n\\n[Brief description of the file's purpose and current state]\\n\\n\`\`\`\\n[The latest version of critical code snippets in this file, <20 lines]\\n\`\`\`\\n\\n### [Critical file name]\\n\\n- [Useful classes/methods/functions]: [Brief description/usage]\\n- ...\\n\\nOmit non-critical code, intermediate attempts, and resolved errors.\\n\\n## Important Context\\n\\n- [Any crucial information not covered above]\\n- ...\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history."
     `);
     expect(ctx.compactHistory()).toMatchInlineSnapshot(`[]`);
     await ctx.expectResumeMatches();
@@ -1171,20 +1134,20 @@ describe('FullCompaction', () => {
       [wire] full_compaction.begin       { "source": "auto", "time": "<time>" }
       [emit] compaction.started          { "trigger": "auto" }
       [emit] compaction.blocked          { "turnId": 0 }
-      [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 498, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 950000, "maxContextTokens": 256000, "contextUsage": 3.7109375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 498, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 498, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction    { "summary": "Auto compacted summary.", "compactedCount": 4, "tokensBefore": 46, "tokensAfter": 28, "time": "<time>" }
-      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 28, "maxContextTokens": 256000, "contextUsage": 0.000109375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 498, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 498, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 499, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 950000, "maxContextTokens": 256000, "contextUsage": 3.7109375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 499, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 499, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.apply_compaction    { "summary": "Auto compacted summary.", "contextSummary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nAuto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 127, "keptUserMessageCount": 4, "time": "<time>" }
+      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 127, "maxContextTokens": 256000, "contextUsage": 0.00049609375, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 499, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 499, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete    { "time": "<time>" }
-      [emit] compaction.completed        { "result": { "summary": "Auto compacted summary.", "compactedCount": 4, "tokensBefore": 46, "tokensAfter": 28 } }
+      [emit] compaction.completed        { "result": { "summary": "Auto compacted summary.", "compactedCount": 7, "tokensBefore": 46, "tokensAfter": 127, "keptUserMessageCount": 4 } }
       [wire] context.append_loop_event   { "event": { "type": "step.begin", "uuid": "<uuid-1>", "turnId": "0", "step": 1 }, "time": "<time>" }
       [emit] turn.step.started           { "turnId": 0, "step": 1, "stepId": "<uuid-1>" }
       [emit] assistant.delta             { "turnId": 0, "delta": "I can answer after compaction." }
       [wire] context.append_loop_event   { "event": { "type": "content.part", "uuid": "<uuid-2>", "turnId": "0", "step": 1, "stepUuid": "<uuid-1>", "part": { "type": "text", "text": "I can answer after compaction." } }, "time": "<time>" }
-      [wire] context.append_loop_event   { "event": { "type": "step.end", "uuid": "<uuid-1>", "turnId": "0", "step": 1, "usage": { "inputOther": 31, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "end_turn" }, "time": "<time>" }
-      [emit] turn.step.completed         { "turnId": 0, "step": 1, "stepId": "<uuid-1>", "usage": { "inputOther": 31, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "end_turn" }
-      [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 31, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "turn", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 42, "maxContextTokens": 256000, "contextUsage": 0.0001640625, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 529, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 529, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 31, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.append_loop_event   { "event": { "type": "step.end", "uuid": "<uuid-1>", "turnId": "0", "step": 1, "usage": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "end_turn" }, "time": "<time>" }
+      [emit] turn.step.completed         { "turnId": 0, "step": 1, "stepId": "<uuid-1>", "usage": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "end_turn" }
+      [wire] usage.record                { "model": "kimi-code", "usage": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "turn", "time": "<time>" }
+      [emit] agent.status.updated        { "model": "kimi-code", "contextTokens": 137, "maxContextTokens": 256000, "contextUsage": 0.00053515625, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "kimi-code": { "inputOther": 625, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 625, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 126, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [emit] turn.ended                  { "turnId": 0, "reason": "completed" }
     `);
     expect(ctx.llmInputs()).toMatchInlineSnapshot(`
@@ -1196,22 +1159,23 @@ describe('FullCompaction', () => {
           assistant: text "old assistant one"
           user: text "old user two"
           assistant: text "old assistant two"
-          user: text <compaction-instruction>
-
-      call 2:
-        messages:
-          assistant: text "Auto compacted summary."
           user: text "recent user three"
           assistant: text "recent assistant three"
           user: text "Answer after compacting"
+          user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nYou are now given a task to compact this conversation context according to the priorities and output requirements below.\\n\\nThe goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.\\n\\nCompression priorities, in order:\\n\\n1. Current Task State: what is being worked on right now\\n2. Errors & Solutions: unresolved or recurring errors and their resolutions\\n3. Code Evolution: final working versions only; remove intermediate attempts\\n4. System Context: project structure, dependencies, environment setup\\n5. Design Decisions: architectural choices and their rationale\\n6. TODO Items: unfinished tasks and known issues\\n\\nRequired output structure:\\n\\n## Current Focus\\n\\n[What we're working on now]\\n\\n## Environment\\n\\n- [Key setup/config points]\\n- ...\\n\\n## Completed Tasks\\n\\n- [Task]: [Brief outcome]\\n- ...\\n\\n## Active Issues\\n\\n- [Issue]: [Status/Next steps]\\n- ...\\n\\n## Code State\\n\\n### [Critical file name]\\n\\n[Brief description of the file's purpose and current state]\\n\\n\`\`\`\\n[The latest version of critical code snippets in this file, <20 lines]\\n\`\`\`\\n\\n### [Critical file name]\\n\\n- [Useful classes/methods/functions]: [Brief description/usage]\\n- ...\\n\\nOmit non-critical code, intermediate attempts, and resolved errors.\\n\\n## Important Context\\n\\n- [Any crucial information not covered above]\\n- ...\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history."
+
+      call 2:
+        messages:
+          user: text "old user one\\n\\nold user two\\n\\nrecent user three\\n\\nAnswer after compacting"
+          user: text "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nAuto compacted summary."
     `);
     expect(records).toContainEqual({
       event: 'compaction_finished',
       properties: expect.objectContaining({
         source: 'auto',
         tokens_before: 46,
-        tokens_after: 28,
-        compacted_count: 4,
+        tokens_after: 127,
+        compacted_count: 7,
         retry_count: 0,
       }),
     });
@@ -1244,15 +1208,18 @@ describe('FullCompaction', () => {
     await ctx.rpc.beginCompaction({});
     await compacted;
 
-    // Compaction preserves the in-flight tool exchange in recent; the deferred
-    // reminder still cannot land because the tool exchange is still open.
+    // Compaction drops the in-flight tool exchange and the deferred reminder
+    // (initial context is rebuilt every turn); only real user messages and
+    // the compaction summary remain.
     expect(ctx.agent.context.history.map((m) => m.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
+      'user',
+      'user',
     ]);
+    expect(ctx.agent.context.history.at(-1)?.origin).toEqual({ kind: 'compaction_summary' });
 
-    // Closing the exchange flushes the deferred reminder to history.
+    // The dropped tool calls no longer exist, so late tool results are orphans
+    // and do not change history.
     ctx.dispatch({
       type: 'context.append_loop_event',
       event: {
@@ -1273,15 +1240,9 @@ describe('FullCompaction', () => {
     });
 
     expect(ctx.agent.context.history.map((m) => m.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
-      'tool',
       'user',
-    ]);
-    expect(ctx.agent.context.history.at(-1)?.content).toEqual([
-      { type: 'text', text: '<system-reminder>\nhost note\n</system-reminder>' },
+      'user',
     ]);
   });
 
@@ -1312,13 +1273,18 @@ describe('FullCompaction', () => {
     await ctx.rpc.beginCompaction({});
     await compacted;
 
+    // Compaction drops the partially-resolved tool exchange and the deferred
+    // reminder (initial context is rebuilt every turn); only real user
+    // messages and the compaction summary remain.
     expect(ctx.agent.context.history.map((m) => m.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
+      'user',
+      'user',
     ]);
+    expect(ctx.agent.context.history.at(-1)?.origin).toEqual({ kind: 'compaction_summary' });
 
+    // The dropped tool calls no longer exist, so a late tool result is an orphan
+    // and does not change history.
     ctx.dispatch({
       type: 'context.append_loop_event',
       event: {
@@ -1330,77 +1296,101 @@ describe('FullCompaction', () => {
     });
 
     expect(ctx.agent.context.history.map((m) => m.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
-      'tool',
       'user',
-    ]);
-    expect(ctx.agent.context.history.at(-1)?.content).toEqual([
-      { type: 'text', text: '<system-reminder>\nhost note\n</system-reminder>' },
+      'user',
     ]);
   });
 
-  it('fails the turn with compaction.unable when auto compaction has no compactable prefix', async () => {
+  it('rejects manual compaction with compaction.unable when history is empty', async () => {
     const ctx = testAgent();
     ctx.configure({
       provider: CATALOGUED_PROVIDER,
-      modelCapabilities: {
-        ...CATALOGUED_MODEL_CAPABILITIES,
-        max_context_tokens: 2_000,
-      },
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
     });
-    const oversizedPrompt = `initial-pending-verbatim:${'x'.repeat(8_000)}`;
-
-    await ctx.rpc.prompt({ input: [{ type: 'text', text: oversizedPrompt }] });
-    const events = await ctx.untilTurnEnd();
 
-    expect(eventIndex(events, 'compaction.started')).toBe(-1);
+    await expect(ctx.rpc.beginCompaction({})).rejects.toMatchObject({
+      code: 'compaction.unable',
+    });
     expect(ctx.llmCalls).toHaveLength(0);
-    expect(events).toContainEqual(
-      expect.objectContaining({
-        event: 'turn.ended',
-        args: expect.objectContaining({
-          reason: 'failed',
-          error: expect.objectContaining({ code: 'compaction.unable' }),
-        }),
-      }),
-    );
-    await ctx.expectResumeMatches();
   });
 
-  it('rejects manual compaction with compaction.unable when no prefix is compactable', async () => {
+  it('compacts a single user message and keeps it ahead of the summary', async () => {
     const ctx = testAgent();
     ctx.configure({
       provider: CATALOGUED_PROVIDER,
       modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
     });
     ctx.agent.context.appendUserMessage([{ type: 'text', text: 'only pending user' }]);
-
-    await expect(ctx.rpc.beginCompaction({})).rejects.toMatchObject({
-      code: 'compaction.unable',
-    });
-    expect(ctx.llmCalls).toHaveLength(0);
-
-    ctx.agent.context.clear();
-    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
-    ctx.appendExchange(2, 'recent user two', 'recent assistant two', 80);
     const compacted = ctx.once('context.apply_compaction');
     const completed = ctx.once('compaction.completed');
 
-    ctx.mockNextResponse({ type: 'text', text: 'Compacted after no-op cancel.' });
+    ctx.mockNextResponse({ type: 'text', text: 'Single message summary.' });
     await ctx.rpc.beginCompaction({});
     await compacted;
     await completed;
 
     expect(ctx.llmCalls).toHaveLength(1);
     expect(ctx.compactHistory()).toEqual([
-      { role: 'assistant', text: 'Compacted after no-op cancel.' },
+      { role: 'user', text: 'only pending user' },
+      { role: 'user', text: `${COMPACTION_SUMMARY_PREFIX}\nSingle message summary.` },
     ]);
     await ctx.expectResumeMatches();
   });
 
+  it('reinjects the plan-mode reminder after manual compaction', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    await ctx.agent.planMode.enter('compact-plan', false);
+    const planFilePath = ctx.agent.planMode.planFilePath;
+    if (planFilePath === null) throw new Error('plan file path missing');
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: 'draft the plan' }]);
+    await ctx.agent.injection.inject();
+    expect(ctx.compactHistory().at(-1)?.text).toContain(`Plan file: ${planFilePath}`);
+    const completed = ctx.once('compaction.completed');
+
+    ctx.mockNextResponse({ type: 'text', text: 'Plan-mode compacted summary.' });
+    await ctx.rpc.beginCompaction({});
+    await completed;
+
+    await vi.waitFor(() => {
+      const planReminders = ctx.agent.context.history.filter(
+        (message) => message.origin?.kind === 'injection' && message.origin.variant === 'plan_mode',
+      );
+      expect(planReminders).toHaveLength(1);
+      expect(messageText(planReminders[0])).toContain(`Plan file: ${planFilePath}`);
+    });
+    expect(ctx.compactHistory().at(-1)?.text).toContain(`Plan file: ${planFilePath}`);
+    await ctx.expectResumeMatches();
+  });
+
+  it('includes the plan-mode reminder in the answer request after auto compaction', async () => {
+    const ctx = testAgent();
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    await ctx.agent.planMode.enter('auto-compact-plan', false);
+    const planFilePath = ctx.agent.planMode.planFilePath;
+    if (planFilePath === null) throw new Error('plan file path missing');
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 100);
+    ctx.appendExchange(2, 'recent user two', 'recent assistant two', 950_000);
+    await ctx.agent.injection.inject();
+
+    ctx.mockNextResponse({ type: 'text', text: 'Auto plan compacted summary.' });
+    ctx.mockNextResponse({ type: 'text', text: 'I can answer with the plan path.' });
+    await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Continue the plan' }] });
+    await ctx.untilTurnEnd();
+
+    expect(ctx.llmCalls).toHaveLength(2);
+    const answerTexts = ctx.llmCalls[1]?.history.map(messageText) ?? [];
+    expect(answerTexts.some((text) => text.includes(`Plan file: ${planFilePath}`))).toBe(true);
+    await ctx.expectResumeMatches();
+  });
+
   it('does not auto compact small contexts when reserved size exceeds the model window', async () => {
     const ctx = testAgent({
       initialConfig: {
@@ -1451,8 +1441,10 @@ describe('FullCompaction', () => {
 
     expect(ctx.llmCalls).toHaveLength(2);
     const [compactionCall, answerCall] = ctx.llmCalls;
-    expect(messageText(compactionCall?.history.at(-1))).toContain('<!-- Compression Priorities');
-    expect(answerCall?.history.map(messageText)).toContain('Reserved compacted summary.');
+    expect(messageText(compactionCall?.history.at(-1))).toContain('CONTEXT CHECKPOINT COMPACTION');
+    expect(
+      answerCall?.history.map(messageText).some((text) => text.includes('Reserved compacted summary.')),
+    ).toBe(true);
     await ctx.expectResumeMatches();
   });
 
@@ -1476,10 +1468,21 @@ describe('FullCompaction', () => {
     expect(ctx.llmCalls).toHaveLength(2);
     const [compactionCall, answerCall] = ctx.llmCalls;
     const compactionTexts = compactionCall?.history.map(messageText) ?? [];
-    expect(compactionTexts.some((text) => text.includes('keep-this-pending-verbatim'))).toBe(false);
-    expect(compactionCall?.history.map((message) => message.role)).toEqual(['user', 'assistant', 'user']);
-    expect(answerCall?.history.map(messageText)).toContain('Oversized prompt summary.');
-    expect(messageText(answerCall?.history.at(-1))).toBe(oversizedPrompt);
+    // The whole history is compacted, so the pending prompt is included in the
+    // compaction input and kept verbatim in the post-compaction replacement.
+    expect(compactionTexts.some((text) => text.includes('keep-this-pending-verbatim'))).toBe(true);
+    expect(compactionCall?.history.map((message) => message.role)).toEqual([
+      'user',
+      'assistant',
+      'user',
+      'user',
+    ]);
+    expect(
+      answerCall?.history.map(messageText).some((text) => text.includes('Oversized prompt summary.')),
+    ).toBe(true);
+    expect(
+      answerCall?.history.map(messageText).some((text) => text.includes('keep-this-pending-verbatim')),
+    ).toBe(true);
     await ctx.expectResumeMatches();
   });
 
@@ -1492,6 +1495,8 @@ describe('FullCompaction', () => {
         max_context_tokens: 1_000_000,
       },
     });
+    // The auto-compact ratio is 0.85, so the context alone (840k) sits below
+    // the 850k threshold and the pending prompt pushes it over.
     ctx.appendExchange(1, 'old user one', 'old assistant one', 840_000);
     const pendingPrompt = `ratio-pending-verbatim:${'x'.repeat(60_000)}`;
 
@@ -1503,10 +1508,21 @@ describe('FullCompaction', () => {
     expect(ctx.llmCalls).toHaveLength(2);
     const [compactionCall, answerCall] = ctx.llmCalls;
     const compactionTexts = compactionCall?.history.map(messageText) ?? [];
-    expect(compactionTexts.some((text) => text.includes('ratio-pending-verbatim'))).toBe(false);
-    expect(compactionCall?.history.map((message) => message.role)).toEqual(['user', 'assistant', 'user']);
-    expect(answerCall?.history.map(messageText)).toContain('Ratio compacted summary.');
-    expect(messageText(answerCall?.history.at(-1))).toBe(pendingPrompt);
+    // The whole history is compacted, so the pending prompt is included in the
+    // compaction input and kept verbatim in the post-compaction replacement.
+    expect(compactionTexts.some((text) => text.includes('ratio-pending-verbatim'))).toBe(true);
+    expect(compactionCall?.history.map((message) => message.role)).toEqual([
+      'user',
+      'assistant',
+      'user',
+      'user',
+    ]);
+    expect(
+      answerCall?.history.map(messageText).some((text) => text.includes('Ratio compacted summary.')),
+    ).toBe(true);
+    expect(
+      answerCall?.history.map(messageText).some((text) => text.includes('ratio-pending-verbatim')),
+    ).toBe(true);
 
     await ctx.expectResumeMatches();
   });
@@ -1554,8 +1570,8 @@ describe('FullCompaction', () => {
       expect.objectContaining({
         event: 'context.apply_compaction',
         args: expect.objectContaining({
-          summary: 'Overflow compacted summary.',
-          compactedCount: 2,
+          summary: expect.stringContaining('Overflow compacted summary.'),
+          compactedCount: 4,
         }),
       }),
     );
@@ -1575,47 +1591,137 @@ describe('FullCompaction', () => {
         [
           "user: old user one",
           "assistant: old assistant one",
+          "user: Retry after provider overflow",
           "user: <compaction-instruction>",
         ],
         [
-          "assistant: Overflow compacted summary.",
-          "user: Retry after provider overflow",
+          "user: old user one
+
+      Retry after provider overflow",
+          "user: Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:
+      Overflow compacted summary.",
         ],
       ]
     `);
     await ctx.expectResumeMatches();
   });
 
-  it('uses observed max from overflow to size compaction input', async () => {
-    const ctx = testAgent();
+  it('stops repeated provider-overflow compactions when the compacted context still overflows', async () => {
+    let callCount = 0;
+    const generate: GenerateFn = async (_provider, _system, _tools, history) => {
+      callCount += 1;
+      if (messageText(history.at(-1)).includes('CONTEXT CHECKPOINT COMPACTION')) {
+        return textResult(`Still too large summary ${String(callCount)}.`);
+      }
+      throw new APIContextOverflowError(400, 'Context length exceeded', `req-overflow-${String(callCount)}`);
+    };
+    const ctx = testAgent({ generate });
     ctx.configure({
       provider: CATALOGUED_PROVIDER,
-      modelCapabilities: {
-        ...CATALOGUED_MODEL_CAPABILITIES,
-        max_context_tokens: 1_000_000,
-      },
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
+
+    await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Retry until overflow guard' }] });
+    const events = await ctx.untilTurnEnd();
+
+    expect(countEvents(events, 'compaction.started')).toBe(3);
+    expect(callCount).toBe(7);
+    expect(events).toContainEqual(
+      expect.objectContaining({
+        event: 'turn.ended',
+        args: expect.objectContaining({
+          reason: 'failed',
+          error: expect.objectContaining({
+            code: 'context.overflow',
+            message: 'Compaction failed to bring the context under the model window after 3 attempts.',
+          }),
+        }),
+      }),
+    );
+  });
+
+  it('does not leave an orphan tool result at the start when reducing overflowing compaction input', async () => {
+    const inputs: string[][] = [];
+    const generate: GenerateFn = async (_provider, _system, _tools, history) => {
+      inputs.push(inputHistorySnapshot(history));
+      if (inputs.length === 1) {
+        throw new APIContextOverflowError(400, 'Context length exceeded', 'req-compact-overflow');
+      }
+      return textResult('Reduced tool history summary.');
+    };
+    const ctx = testAgent({ generate });
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    ctx.appendToolExchange();
+    let applyRecord: { compactedCount?: number; droppedCount?: number } | undefined;
+    ctx.emitter.on('context.apply_compaction', (entry) => {
+      applyRecord = (entry as { args: { compactedCount?: number; droppedCount?: number } }).args;
     });
-    for (let i = 0; i < 20; i++) {
+    const compacted = ctx.once('context.apply_compaction');
+    const completed = ctx.once('compaction.completed');
+
+    await ctx.rpc.beginCompaction({});
+    await compacted;
+    await completed;
+
+    expect(inputs).toHaveLength(2);
+    const reducedHistory = inputs[1]!.slice(0, -1);
+    expect(reducedHistory[0]?.split(':', 1)[0]).not.toBe('tool');
+    // The whole 3-message history was folded (compactedCount), and all 3 were
+    // trimmed from the summarizer input on overflow (droppedCount), so the
+    // record honestly reports that the summary covers none of them.
+    expect(applyRecord?.compactedCount).toBe(3);
+    expect(applyRecord?.droppedCount).toBe(3);
+    await ctx.expectResumeMatches();
+  });
+
+  it('shrinks overflowing compaction input aggressively instead of one message at a time', async () => {
+    const inputs: string[][] = [];
+    let applyRecord: { compactedCount?: number; droppedCount?: number } | undefined;
+    const generate: GenerateFn = async (_provider, _system, _tools, history) => {
+      inputs.push(inputHistorySnapshot(history));
+      const compactedHistory = history.slice(0, -1);
+      if (compactedHistory.length > 20) {
+        throw new APIContextOverflowError(
+          400,
+          'Context length exceeded',
+          `req-long-compact-${String(inputs.length)}`,
+        );
+      }
+      return textResult('Aggressively reduced summary.');
+    };
+    const ctx = testAgent({ generate });
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    for (let i = 0; i < 30; i++) {
       ctx.appendExchange(
-        i + 1,
-        `old user ${String(i)}`,
-        `old assistant ${String(i)} ${'x'.repeat(40_000)}`,
-        20_000,
+        i,
+        `old user ${String(i)} ${'u'.repeat(400)}`,
+        `old assistant ${String(i)} ${'a'.repeat(400)}`,
+        10,
       );
     }
-    ctx.agent.fullCompaction.observeContextOverflow(200_000);
+    ctx.emitter.on('context.apply_compaction', (entry) => {
+      applyRecord = (entry as { args: { compactedCount?: number; droppedCount?: number } }).args;
+    });
     const compacted = ctx.once('context.apply_compaction');
     const completed = ctx.once('compaction.completed');
 
-    ctx.mockNextResponse({ type: 'text', text: 'Observed max summary.' });
     await ctx.rpc.beginCompaction({});
     await compacted;
     await completed;
 
-    expect(ctx.agent.fullCompaction.getEffectiveMaxContextTokens()).toBe(170_000);
-    const compactionTokens = estimateTokensForMessages(ctx.llmCalls[0]?.history ?? []);
-    expect(compactionTokens).toBeLessThan(200_000);
-    expect(ctx.compactHistory()[0]).toEqual({ role: 'assistant', text: 'Observed max summary.' });
+    expect(inputs[0]?.length).toBeGreaterThan(50);
+    expect(inputs.length).toBeLessThanOrEqual(4);
+    const finalCompactedHistory = inputs.at(-1)!.slice(0, -1);
+    expect(finalCompactedHistory[0]?.split(':', 1)[0]).not.toBe('tool');
+    expect(applyRecord?.compactedCount).toBe(60);
+    expect(applyRecord?.droppedCount).toBeGreaterThan(0);
     await ctx.expectResumeMatches();
   });
 
@@ -1794,8 +1900,8 @@ describe('FullCompaction', () => {
       expect.objectContaining({
         event: 'context.apply_compaction',
         args: expect.objectContaining({
-          summary: 'Unknown window compacted summary.',
-          compactedCount: 2,
+          summary: expect.stringContaining('Unknown window compacted summary.'),
+          compactedCount: 4,
         }),
       }),
     );
@@ -2000,8 +2106,8 @@ describe('FullCompaction', () => {
       expect.objectContaining({
         event: 'context.apply_compaction',
         args: expect.objectContaining({
-          summary: 'Placeholder compacted summary.',
-          compactedCount: 2,
+          summary: expect.stringContaining('Placeholder compacted summary.'),
+          compactedCount: 4,
         }),
       }),
     );
@@ -2028,12 +2134,12 @@ describe('FullCompaction', () => {
       [wire] full_compaction.begin       { "source": "auto", "time": "<time>" }
       [emit] compaction.started          { "trigger": "auto" }
       [emit] compaction.blocked          { "turnId": 0 }
-      [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 482, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 0, "maxContextTokens": 1000000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 482, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 482, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
-      [wire] context.apply_compaction    { "summary": "First compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 6, "time": "<time>" }
-      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 6, "maxContextTokens": 1000000, "contextUsage": 0.000006, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 482, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 482, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 461, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "session", "time": "<time>" }
+      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 0, "maxContextTokens": 1000000, "contextUsage": 0, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 461, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 461, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.apply_compaction    { "summary": "First compacted summary.", "contextSummary": "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nFirst compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 114, "keptUserMessageCount": 1, "time": "<time>" }
+      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 114, "maxContextTokens": 1000000, "contextUsage": 0.000114, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 461, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 461, "output": 9, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [wire] full_compaction.complete    { "time": "<time>" }
-      [emit] compaction.completed        { "result": { "summary": "First compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 6 } }
+      [emit] compaction.completed        { "result": { "summary": "First compacted summary.", "compactedCount": 1, "tokensBefore": 8, "tokensAfter": 114, "keptUserMessageCount": 1 } }
       [wire] context.append_loop_event   { "event": { "type": "step.begin", "uuid": "<uuid-1>", "turnId": "0", "step": 1 }, "time": "<time>" }
       [emit] turn.step.started           { "turnId": 0, "step": 1, "stepId": "<uuid-1>" }
       [emit] assistant.delta             { "turnId": 0, "delta": "I need a tool." }
@@ -2043,10 +2149,10 @@ describe('FullCompaction', () => {
       [emit] tool.call.started           { "turnId": 0, "toolCallId": "call_missing", "name": "MissingTool", "args": {} }
       [wire] context.append_loop_event   { "event": { "type": "tool.result", "parentUuid": "call_missing", "toolCallId": "call_missing", "result": { "output": "Tool \\"MissingTool\\" not found", "isError": true } }, "time": "<time>" }
       [emit] tool.result                 { "turnId": 0, "toolCallId": "call_missing", "output": "Tool \\"MissingTool\\" not found", "isError": true }
-      [wire] context.append_loop_event   { "event": { "type": "step.end", "uuid": "<uuid-1>", "turnId": "0", "step": 1, "usage": { "inputOther": 9, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "tool_use" }, "time": "<time>" }
-      [emit] turn.step.completed         { "turnId": 0, "step": 1, "stepId": "<uuid-1>", "usage": { "inputOther": 9, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "tool_use" }
-      [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 9, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "turn", "time": "<time>" }
-      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 20, "maxContextTokens": 1000000, "contextUsage": 0.00002, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 491, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 491, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 9, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
+      [wire] context.append_loop_event   { "event": { "type": "step.end", "uuid": "<uuid-1>", "turnId": "0", "step": 1, "usage": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "tool_use" }, "time": "<time>" }
+      [emit] turn.step.completed         { "turnId": 0, "step": 1, "stepId": "<uuid-1>", "usage": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "finishReason": "tool_use" }
+      [wire] usage.record                { "model": "mock-model", "usage": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 }, "usageScope": "turn", "time": "<time>" }
+      [emit] agent.status.updated        { "model": "mock-model", "contextTokens": 126, "maxContextTokens": 1000000, "contextUsage": 0.000126, "planMode": false, "swarmMode": false, "permission": "manual", "usage": { "byModel": { "mock-model": { "inputOther": 576, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 } }, "total": { "inputOther": 576, "output": 20, "inputCacheRead": 0, "inputCacheCreation": 0 }, "currentTurn": { "inputOther": 115, "output": 11, "inputCacheRead": 0, "inputCacheCreation": 0 } } }
       [emit] turn.step.interrupted       { "turnId": 0, "step": 2, "reason": "error", "message": "Compaction limit exceeded (1)" }
       [emit] turn.ended                  { "turnId": 0, "reason": "failed", "error": { "code": "context.overflow", "message": "Compaction limit exceeded (1)", "name": "KimiError", "details": { "maxCompactions": 1, "turnId": 0 }, "retryable": true } }
     `);
@@ -2059,49 +2165,16 @@ describe('FullCompaction', () => {
         tools: []
         messages:
           user: text "Trigger repeated compaction"
-          user: text <compaction-instruction>
+          user: text "You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.\\n\\n--- This message is a direct task, not part of the above conversation ---\\n\\nYou are now given a task to compact this conversation context according to the priorities and output requirements below.\\n\\nThe goal of compaction is to keep essential code patterns, technical details, and architectural decisions for continuing development without losing context after the above messages are cleared.\\n\\nCompression priorities, in order:\\n\\n1. Current Task State: what is being worked on right now\\n2. Errors & Solutions: unresolved or recurring errors and their resolutions\\n3. Code Evolution: final working versions only; remove intermediate attempts\\n4. System Context: project structure, dependencies, environment setup\\n5. Design Decisions: architectural choices and their rationale\\n6. TODO Items: unfinished tasks and known issues\\n\\nRequired output structure:\\n\\n## Current Focus\\n\\n[What we're working on now]\\n\\n## Environment\\n\\n- [Key setup/config points]\\n- ...\\n\\n## Completed Tasks\\n\\n- [Task]: [Brief outcome]\\n- ...\\n\\n## Active Issues\\n\\n- [Issue]: [Status/Next steps]\\n- ...\\n\\n## Code State\\n\\n### [Critical file name]\\n\\n[Brief description of the file's purpose and current state]\\n\\n\`\`\`\\n[The latest version of critical code snippets in this file, <20 lines]\\n\`\`\`\\n\\n### [Critical file name]\\n\\n- [Useful classes/methods/functions]: [Brief description/usage]\\n- ...\\n\\nOmit non-critical code, intermediate attempts, and resolved errors.\\n\\n## Important Context\\n\\n- [Any crucial information not covered above]\\n- ...\\n\\nBe concise, structured, and focused on helping the next LLM seamlessly continue the work.\\n\\nRespond with text only. Do not call any tools — you already have everything you need in the conversation history."
 
       call 2:
         messages:
-          assistant: text "First compacted summary."
+          user: text "Trigger repeated compaction"
+          user: text "Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:\\nFirst compacted summary."
     `);
     await ctx.expectResumeMatches();
   });
 
-  it('appends the todo list to the compaction summary', async () => {
-    const ctx = testAgent();
-    ctx.configure({
-      provider: CATALOGUED_PROVIDER,
-      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
-    });
-    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
-    ctx.appendExchange(2, 'recent user two', 'recent assistant two', 80);
-
-    ctx.agent.tools.updateStore('todo', [
-      { title: 'Fix the auth bug', status: 'in_progress' },
-      { title: 'Add tests', status: 'pending' },
-    ]);
-
-    const compacted = new Promise<void>((resolve) => {
-      ctx.emitter.once('context.apply_compaction', () => {
-        resolve();
-      });
-    });
-    const completed = ctx.once('compaction.completed');
-
-    ctx.mockNextResponse({ type: 'text', text: 'Compacted summary.' });
-    await ctx.rpc.beginCompaction({});
-    await compacted;
-    await completed;
-
-    const history = ctx.compactHistory();
-    expect(history).toHaveLength(1);
-    expect(history[0]).toMatchObject({
-      role: 'assistant',
-      text: 'Compacted summary.\n\n## TODO List\n  [in_progress] Fix the auth bug\n  [pending] Add tests',
-    });
-    await ctx.expectResumeMatches();
-  });
 });
 
 afterEach(() => {
@@ -2244,10 +2317,9 @@ function realKosongGenerate(
 const alwaysCompactOnce: CompactionStrategy = {
   shouldCompact: () => true,
   shouldBlock: () => true,
-  computeCompactCount: (messages: readonly Message[]) => messages.length,
-  reduceCompactOnOverflow: (messages: readonly Message[]) => messages.length,
   checkAfterStep: true,
   maxCompactionPerTurn: 1,
+  maxOverflowCompactionAttempts: 3,
 };
 
 function missingToolCall(): ToolCall {
@@ -2259,29 +2331,13 @@ function missingToolCall(): ToolCall {
   };
 }
 
-function testCompactionStrategy(maxSize: number = 1_000): DefaultCompactionStrategy {
-  return new DefaultCompactionStrategy(() => maxSize, {
-    triggerRatio: 0.85,
-    blockRatio: 0.85,
-    reservedContextSize: 0,
-    maxCompactionPerTurn: 3,
-    maxRecentMessages: 10,
-    maxRecentUserMessages: Infinity,
-    maxRecentSizeRatio: 0.2,
-    minOverflowReductionRatio: 0.05,
-  });
-}
-
 function overflowOnlyCompactionStrategy(maxSize: number = 14): DefaultCompactionStrategy {
   return new DefaultCompactionStrategy(() => maxSize, {
     triggerRatio: Infinity,
     blockRatio: Infinity,
     reservedContextSize: 0,
     maxCompactionPerTurn: 3,
-    maxRecentMessages: 3,
-    maxRecentUserMessages: Infinity,
-    maxRecentSizeRatio: 0.2,
-    minOverflowReductionRatio: 0.05,
+    maxOverflowCompactionAttempts: 3,
   });
 }
 
@@ -2331,5 +2387,5 @@ function inputHistorySnapshot(history: readonly Message[]): string[] {
 }
 
 function normalizeInputText(text: string): string {
-  return text.includes('compact this conversation context') ? '<compaction-instruction>' : text;
+  return text.includes('CONTEXT CHECKPOINT COMPACTION') ? '<compaction-instruction>' : text;
 }
diff --git a/packages/agent-core/test/agent/compaction/memento.test.ts b/packages/agent-core/test/agent/compaction/memento.test.ts
new file mode 100644
index 000000000..912f247a1
--- /dev/null
+++ b/packages/agent-core/test/agent/compaction/memento.test.ts
@@ -0,0 +1,248 @@
+import type { Message } from '@moonshot-ai/kosong';
+import { describe, expect, it } from 'vitest';
+
+import {
+  COMPACTION_SUMMARY_PREFIX,
+  buildCompactionSummaryText,
+  collectCompactableUserMessages,
+  compactionUserMessageDisposition,
+  isCompactionSummaryMessage,
+  isRealUserInput,
+  selectRecentUserMessages,
+  type CompactionUserDisposition,
+} from '../../../src/agent/compaction';
+import type { PromptOrigin } from '../../../src/agent/context/types';
+import { estimateTokens, estimateTokensForMessage } from '../../../src/utils/tokens';
+
+function textMessage(role: 'user' | 'assistant' | 'tool', text: string): Message {
+  return { role, content: [{ type: 'text', text }], toolCalls: [] };
+}
+
+function messageText(message: Message): string {
+  return message.content.map((part) => (part.type === 'text' ? part.text : '')).join('');
+}
+
+const ALL_PROMPT_ORIGIN_KINDS = {
+  user: true,
+  skill_activation: true,
+  injection: true,
+  shell_command: true,
+  compaction_summary: true,
+  system_trigger: true,
+  background_task: true,
+  cron_job: true,
+  cron_missed: true,
+  hook_result: true,
+  retry: true,
+} satisfies Record<PromptOrigin['kind'], true>;
+
+const EXPECTED_DISPOSITION: Record<PromptOrigin['kind'], CompactionUserDisposition> = {
+  user: 'keep',
+  skill_activation: 'keep',
+  injection: 'drop',
+  shell_command: 'drop',
+  compaction_summary: 'drop',
+  system_trigger: 'drop',
+  background_task: 'drop',
+  cron_job: 'drop',
+  cron_missed: 'drop',
+  hook_result: 'drop',
+  retry: 'drop',
+};
+
+function originForKind(kind: PromptOrigin['kind']): PromptOrigin {
+  switch (kind) {
+    case 'user':
+      return { kind: 'user' };
+    case 'skill_activation':
+      return {
+        kind: 'skill_activation',
+        activationId: 'activation',
+        skillName: 'skill',
+        trigger: 'user-slash',
+      };
+    case 'injection':
+      return { kind: 'injection', variant: 'system_reminder' };
+    case 'shell_command':
+      return { kind: 'shell_command', phase: 'input' };
+    case 'compaction_summary':
+      return { kind: 'compaction_summary' };
+    case 'system_trigger':
+      return { kind: 'system_trigger', name: 'system' };
+    case 'background_task':
+      return {
+        kind: 'background_task',
+        taskId: 'task',
+        status: 'completed',
+        notificationId: 'notification',
+      };
+    case 'cron_job':
+      return {
+        kind: 'cron_job',
+        jobId: 'job',
+        cron: '* * * * *',
+        recurring: true,
+        coalescedCount: 1,
+        stale: false,
+      };
+    case 'cron_missed':
+      return { kind: 'cron_missed', count: 1 };
+    case 'hook_result':
+      return { kind: 'hook_result', event: 'PreCompact' };
+    case 'retry':
+      return { kind: 'retry', trigger: 'system' };
+  }
+}
+
+describe('isCompactionSummaryMessage', () => {
+  it('detects the compaction origin', () => {
+    const message = {
+      ...textMessage('user', 'anything'),
+      origin: { kind: 'compaction_summary' as const },
+    };
+    expect(isCompactionSummaryMessage(message)).toBe(true);
+  });
+
+  it('keeps real user prompts even when they start with the summary prefix', () => {
+    const message = {
+      ...textMessage('user', `${COMPACTION_SUMMARY_PREFIX}\nsummary`),
+      origin: { kind: 'user' as const },
+    };
+
+    expect(isCompactionSummaryMessage(message)).toBe(false);
+    expect(collectCompactableUserMessages([message])).toEqual([message]);
+  });
+
+  it('ignores ordinary user messages', () => {
+    expect(isCompactionSummaryMessage(textMessage('user', 'hello'))).toBe(false);
+  });
+});
+
+describe('compactionUserMessageDisposition', () => {
+  it('classifies every prompt origin kind', () => {
+    for (const kind of Object.keys(ALL_PROMPT_ORIGIN_KINDS) as Array<PromptOrigin['kind']>) {
+      expect(compactionUserMessageDisposition(originForKind(kind))).toBe(EXPECTED_DISPOSITION[kind]);
+    }
+  });
+
+  it('drops model-triggered skill activations', () => {
+    expect(
+      compactionUserMessageDisposition({
+        kind: 'skill_activation',
+        activationId: 'activation',
+        skillName: 'skill',
+        trigger: 'model-tool',
+      }),
+    ).toBe('drop');
+  });
+});
+
+describe('isRealUserInput', () => {
+  it('keeps genuine user input and drops other origins', () => {
+    expect(isRealUserInput({ ...textMessage('user', 'hello'), origin: originForKind('user') })).toBe(
+      true,
+    );
+    expect(
+      isRealUserInput({ ...textMessage('user', 'hello'), origin: originForKind('skill_activation') }),
+    ).toBe(true);
+    expect(
+      isRealUserInput({ ...textMessage('user', 'hello'), origin: originForKind('injection') }),
+    ).toBe(false);
+    expect(
+      isRealUserInput({ ...textMessage('user', 'hello'), origin: originForKind('shell_command') }),
+    ).toBe(false);
+    expect(
+      isRealUserInput({ ...textMessage('user', 'hello'), origin: originForKind('background_task') }),
+    ).toBe(false);
+  });
+});
+
+describe('collectCompactableUserMessages', () => {
+  it('keeps only user messages', () => {
+    const messages = [
+      textMessage('user', 'u1'),
+      textMessage('assistant', 'a1'),
+      textMessage('tool', 't1'),
+      textMessage('user', 'u2'),
+    ];
+
+    expect(collectCompactableUserMessages(messages).map(messageText)).toEqual(['u1', 'u2']);
+  });
+
+  it('drops previous compaction summaries', () => {
+    const summary = {
+      ...textMessage('user', `${COMPACTION_SUMMARY_PREFIX}\nold summary`),
+      origin: { kind: 'compaction_summary' as const },
+    };
+    const messages = [textMessage('user', 'u1'), summary, textMessage('user', 'u2')];
+
+    expect(collectCompactableUserMessages(messages).map(messageText)).toEqual(['u1', 'u2']);
+  });
+});
+
+describe('selectRecentUserMessages', () => {
+  it('keeps the most recent messages within the budget', () => {
+    const messages = [
+      textMessage('user', 'old'),
+      textMessage('user', 'mid'),
+      textMessage('user', 'recent'),
+    ];
+    const budget = estimateTokensForMessage(messages[1]!) + estimateTokensForMessage(messages[2]!);
+
+    expect(selectRecentUserMessages(messages, budget).map(messageText)).toEqual(['mid', 'recent']);
+  });
+
+  it('truncates the oldest kept message when it would overflow the budget', () => {
+    const long = 'x'.repeat(1_000);
+    const messages = [textMessage('user', long), textMessage('user', 'recent')];
+    const budget = estimateTokensForMessage(messages[1]!) + 10;
+
+    const selected = selectRecentUserMessages(messages, budget);
+
+    expect(selected).toHaveLength(2);
+    expect(estimateTokens(messageText(selected[0]!))).toBeLessThanOrEqual(10);
+    expect(messageText(selected[1]!)).toBe('recent');
+  });
+
+  it('truncates a CJK-heavy oldest message within the budget in one pass', () => {
+    const cjk = '中'.repeat(40_000);
+    const messages = [textMessage('user', cjk), textMessage('user', 'recent')];
+    const budget = estimateTokensForMessage(messages[1]!) + 1_000;
+
+    const selected = selectRecentUserMessages(messages, budget);
+
+    expect(selected).toHaveLength(2);
+    expect(messageText(selected[1]!)).toBe('recent');
+    expect(estimateTokens(messageText(selected[0]!))).toBeLessThanOrEqual(1_000);
+    expect(cjk.startsWith(messageText(selected[0]!))).toBe(true);
+  });
+
+  it('does not split surrogate pairs while truncating emoji text', () => {
+    const emoji = '😀'.repeat(2_000);
+    const messages = [textMessage('user', emoji), textMessage('user', 'recent')];
+    const budget = estimateTokensForMessage(messages[1]!) + 333;
+
+    const selected = selectRecentUserMessages(messages, budget);
+    const truncated = messageText(selected[0]!);
+
+    expect(selected).toHaveLength(2);
+    expect(messageText(selected[1]!)).toBe('recent');
+    expect(estimateTokens(truncated)).toBeLessThanOrEqual(333);
+    expect(/^(?:😀)*$/u.test(truncated)).toBe(true);
+    expect(truncated.length % 2).toBe(0);
+  });
+
+  it('returns nothing when the budget is zero', () => {
+    expect(selectRecentUserMessages([textMessage('user', 'hi')], 0)).toEqual([]);
+  });
+});
+
+describe('buildCompactionSummaryText', () => {
+  it('prefixes the summary', () => {
+    expect(buildCompactionSummaryText('Summary.')).toBe(`${COMPACTION_SUMMARY_PREFIX}\nSummary.`);
+  });
+
+  it('falls back when the summary is empty', () => {
+    expect(buildCompactionSummaryText('   ')).toBe(`${COMPACTION_SUMMARY_PREFIX}\n(no summary available)`);
+  });
+});
diff --git a/packages/agent-core/test/agent/compaction/micro.test.ts b/packages/agent-core/test/agent/compaction/micro.test.ts
index edc931aaa..ed1a6ccf6 100644
--- a/packages/agent-core/test/agent/compaction/micro.test.ts
+++ b/packages/agent-core/test/agent/compaction/micro.test.ts
@@ -700,10 +700,10 @@ describe('MicroCompaction', () => {
     await ctx.rpc.beginCompaction({});
     await compacted;
 
-    expect(ctx.agent.context.messages).toHaveLength(1);
-    expect(ctx.agent.context.messages[0]).toMatchObject({
-      role: 'assistant',
-      content: [{ type: 'text', text: 'Summary.' }],
+    expect(ctx.agent.context.messages).toHaveLength(2);
+    expect(ctx.agent.context.messages[1]).toMatchObject({
+      role: 'user',
+      content: [{ type: 'text', text: expect.stringContaining('Summary.') }],
     });
   });
 
diff --git a/packages/agent-core/test/agent/compaction/strategy.test.ts b/packages/agent-core/test/agent/compaction/strategy.test.ts
index ebc4c7cdd..84422eb4e 100644
--- a/packages/agent-core/test/agent/compaction/strategy.test.ts
+++ b/packages/agent-core/test/agent/compaction/strategy.test.ts
@@ -1,155 +1,80 @@
-
-import {
-  type Message
-} from '@moonshot-ai/kosong';
 import { describe, expect, it } from 'vitest';
 
-import { DefaultCompactionStrategy } from '../../../src/agent/compaction';
-import { estimateTokensForMessages } from '../../../src/utils/tokens';
+import {
+  DEFAULT_COMPACTION_CONFIG,
+  DefaultCompactionStrategy,
+} from '../../../src/agent/compaction';
 
 describe('DefaultCompactionStrategy', () => {
-  it('keeps an oversized trailing user message as recent', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', `pending user ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('keeps consecutive trailing user messages as recent', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', `pending user one ${'x'.repeat(1_200)}`),
-      textMessage('user', `pending user two ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('compacts the prefix when the trailing exchange itself is oversized', () => {
-    const strategy = testCompactionStrategy();
-    const messages = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', 'recent user'),
-      textMessage('assistant', `recent assistant ${'x'.repeat(1_200)}`),
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('returns 0 when there is nothing to compact', () => {
-    const strategy = testCompactionStrategy();
-    expect(strategy.computeCompactCount([], 'auto')).toBe(0);
-    expect(strategy.computeCompactCount([textMessage('user', 'only pending')], 'auto')).toBe(0);
-    expect(
-      strategy.computeCompactCount(
-        [
-          textMessage('user', 'a'),
-          textMessage('user', 'b'),
-          textMessage('user', 'c'),
-        ],
-        'auto',
-      ),
-    ).toBe(0);
-  });
-
-  it('returns 0 when no intermediate split exists and the last message is also unsplittable', () => {
-    const strategy = testCompactionStrategy();
-    const messages: Message[] = [
-      textMessage('user', 'inspect'),
-      {
-        role: 'assistant',
-        content: [],
-        toolCalls: [{ type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' }],
-      },
-    ];
-
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(0);
-  });
-
-  it('does not split inside a parallel tool exchange', () => {
-    const strategy = testCompactionStrategy();
-    const messages: Message[] = [
-      textMessage('user', 'old user'),
-      textMessage('assistant', 'old assistant'),
-      textMessage('user', 'run both tools'),
-      {
-        role: 'assistant',
-        content: [],
-        toolCalls: [
-          { type: 'function', id: 'call_a', name: 'Lookup', arguments: '{}' },
-          { type: 'function', id: 'call_b', name: 'Lookup', arguments: '{}' },
-        ],
-      },
-      { role: 'tool', content: [{ type: 'text', text: 'a' }], toolCalls: [], toolCallId: 'call_a' },
-      { role: 'tool', content: [{ type: 'text', text: 'b' }], toolCalls: [], toolCallId: 'call_b' },
-      textMessage('user', 'next prompt'),
-    ];
-
-    // The only valid split is before the parallel exchange (after 'old assistant'),
-    // never between tool_a and tool_b — that would leave tool_b as an orphan.
-    expect(strategy.computeCompactCount(messages, 'auto')).toBe(2);
-  });
-
-  it('shrinks auto compaction input to fit the model window', () => {
-    const maxSize = 1_000;
-    const strategy = testCompactionStrategy(maxSize);
-    const messages = Array.from({ length: 30 }, (_, i) =>
-      textMessage('assistant', `message ${i} ${'x'.repeat(400)}`),
-    );
-
-    const count = strategy.computeCompactCount(messages, 'auto');
+  it('triggers auto-compaction at 85% of the context window', () => {
+    const strategy = new DefaultCompactionStrategy(() => 100_000, {
+      ...DEFAULT_COMPACTION_CONFIG,
+      reservedContextSize: 0,
+    });
 
-    expect(count).toBeGreaterThan(0);
-    expect(count).toBeLessThan(messages.length);
-    expect(estimateTokensForMessages(messages.slice(0, count))).toBeLessThanOrEqual(maxSize);
-    expect(estimateTokensForMessages(messages.slice(0, count + 1))).toBeGreaterThan(maxSize);
+    expect(strategy.shouldCompact(84_999)).toBe(false);
+    expect(strategy.shouldCompact(85_000)).toBe(true);
+    expect(strategy.shouldCompact(100_000)).toBe(true);
   });
 
-  it('shrinks manual compaction input to fit the model window', () => {
-    const maxSize = 1_000;
-    const strategy = testCompactionStrategy(maxSize);
-    const messages = Array.from({ length: 30 }, (_, i) =>
-      textMessage('assistant', `message ${i} ${'x'.repeat(400)}`),
-    );
-
-    const count = strategy.computeCompactCount(messages, 'manual');
+  it('blocks at the same threshold by default (synchronous compaction)', () => {
+    const strategy = new DefaultCompactionStrategy(() => 100_000, {
+      ...DEFAULT_COMPACTION_CONFIG,
+      reservedContextSize: 0,
+    });
 
-    expect(count).toBeGreaterThan(0);
-    expect(count).toBeLessThan(messages.length);
-    expect(estimateTokensForMessages(messages.slice(0, count))).toBeLessThanOrEqual(maxSize);
-    expect(estimateTokensForMessages(messages.slice(0, count + 1))).toBeGreaterThan(maxSize);
+    expect(strategy.shouldBlock(84_999)).toBe(false);
+    expect(strategy.shouldBlock(85_000)).toBe(true);
+    expect(strategy.checkAfterStep).toBe(false);
   });
 
-  it('reserves response context by default before the ratio threshold is reached', () => {
+  it('reserves response context before the ratio threshold is reached', () => {
     const strategy = new DefaultCompactionStrategy(() => 256_000);
 
+    // 256k * 0.85 = 217_600, and the 50k reserve triggers at 206k.
     expect(strategy.shouldCompact(210_000)).toBe(true);
     expect(strategy.shouldBlock(210_000)).toBe(true);
   });
 
   it('ignores reserved context when the reserve is not smaller than the model window', () => {
     const strategy = new DefaultCompactionStrategy(() => 32_000, {
-      triggerRatio: 0.85,
-      blockRatio: 0.85,
+      triggerRatio: 0.9,
+      blockRatio: 0.9,
       reservedContextSize: 50_000,
       maxCompactionPerTurn: 3,
-      maxRecentMessages: 3,
-      maxRecentUserMessages: Infinity,
-      maxRecentSizeRatio: 0.2,
-      minOverflowReductionRatio: 0.05,
+      maxOverflowCompactionAttempts: 3,
     });
 
     expect(strategy.shouldCompact(1)).toBe(false);
     expect(strategy.shouldBlock(1)).toBe(false);
-    expect(strategy.shouldCompact(28_000)).toBe(true);
-    expect(strategy.shouldBlock(28_000)).toBe(true);
+    // Falls back to the 90% ratio: 32_000 * 0.9 = 28_800.
+    expect(strategy.shouldCompact(28_800)).toBe(true);
+    expect(strategy.shouldBlock(28_800)).toBe(true);
+  });
+
+  it('does not compact when the context window is unknown', () => {
+    const strategy = new DefaultCompactionStrategy(() => 0);
+
+    expect(strategy.shouldCompact(1_000_000)).toBe(false);
+    expect(strategy.shouldBlock(1_000_000)).toBe(false);
+  });
+
+  it('enables after-step checks only when ratios differ (async compaction)', () => {
+    const strategy = new DefaultCompactionStrategy(() => 100_000, {
+      triggerRatio: 0.8,
+      blockRatio: 0.9,
+      reservedContextSize: 0,
+      maxCompactionPerTurn: 3,
+      maxOverflowCompactionAttempts: 3,
+    });
+
+    expect(strategy.checkAfterStep).toBe(true);
+  });
+
+  it('exposes maxCompactionPerTurn', () => {
+    const strategy = testCompactionStrategy();
+
+    expect(strategy.maxCompactionPerTurn).toBe(3);
   });
 });
 
@@ -159,30 +84,6 @@ function testCompactionStrategy(maxSize: number = 1_000): DefaultCompactionStrat
     blockRatio: 0.85,
     reservedContextSize: 0,
     maxCompactionPerTurn: 3,
-    maxRecentMessages: 10,
-    maxRecentUserMessages: Infinity,
-    maxRecentSizeRatio: 0.2,
-    minOverflowReductionRatio: 0.05,
+    maxOverflowCompactionAttempts: 3,
   });
 }
-
-function overflowOnlyCompactionStrategy(maxSize: number = 14): DefaultCompactionStrategy {
-  return new DefaultCompactionStrategy(() => maxSize, {
-    triggerRatio: Infinity,
-    blockRatio: Infinity,
-    reservedContextSize: 0,
-    maxCompactionPerTurn: 3,
-    maxRecentMessages: 3,
-    maxRecentUserMessages: Infinity,
-    maxRecentSizeRatio: 0.2,
-    minOverflowReductionRatio: 0.05,
-  });
-}
-
-function textMessage(role: 'user' | 'assistant', text: string): Message {
-  return {
-    role,
-    content: [{ type: 'text', text }],
-    toolCalls: [],
-  };
-}
diff --git a/packages/agent-core/test/agent/context.test.ts b/packages/agent-core/test/agent/context.test.ts
index 580bda69c..d67fc0f6b 100644
--- a/packages/agent-core/test/agent/context.test.ts
+++ b/packages/agent-core/test/agent/context.test.ts
@@ -563,7 +563,7 @@ describe('Agent context', () => {
     await ctx.expectResumeMatches();
   });
 
-  it('preserves deferred reminders when compaction keeps a pending tool exchange', async () => {
+  it('drops deferred reminders when compaction drops a pending tool exchange', async () => {
     const ctx = testAgent();
     ctx.configure();
 
@@ -576,20 +576,24 @@ describe('Agent context', () => {
     });
     ctx.agent.context.applyCompaction({
       summary: 'summary of old prompt',
-      compactedCount: 1,
+      compactedCount: 4,
       tokensBefore: 100,
-      tokensAfter: 40,
     });
     ctx.agent.context.appendSystemReminder('second reminder', {
       kind: 'injection',
       variant: 'host',
     });
 
+    // Compaction keeps only the real user prompt plus the summary; the deferred
+    // first reminder is dropped because initial context is rebuilt every turn.
+    // The second reminder, appended after compaction, is preserved.
     expect(ctx.agent.context.messages.map((message) => message.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
+      'user',
+      'user',
+    ]);
+    expect(ctx.agent.context.messages[2]?.content).toEqual([
+      { type: 'text', text: '<system-reminder>\nsecond reminder\n</system-reminder>' },
     ]);
 
     ctx.dispatch({
@@ -602,24 +606,47 @@ describe('Agent context', () => {
       },
     });
 
+    // The pending tool exchange was dropped by compaction, so the late tool
+    // result is ignored and the history is unchanged.
     expect(ctx.agent.context.messages.map((message) => message.role)).toEqual([
-      'assistant',
       'user',
-      'assistant',
-      'tool',
-      'tool',
       'user',
       'user',
     ]);
-    expect(ctx.agent.context.messages[5]?.content).toEqual([
-      { type: 'text', text: '<system-reminder>\nfirst reminder\n</system-reminder>' },
-    ]);
-    expect(ctx.agent.context.messages[6]?.content).toEqual([
-      { type: 'text', text: '<system-reminder>\nsecond reminder\n</system-reminder>' },
-    ]);
     await ctx.expectResumeMatches();
   });
 
+  it('applyCompaction keeps only real user input from mixed user-role history', () => {
+    const ctx = testAgent();
+    ctx.configure();
+
+    ctx.agent.context.appendUserMessage([{ type: 'text', text: 'real prompt' }]);
+    ctx.agent.context.appendBashInput('pwd');
+    ctx.agent.context.appendBashOutput('/tmp/repo', '', false);
+    ctx.agent.context.appendLocalCommandStdout('local command output');
+    ctx.agent.context.appendSystemReminder('stale reminder', {
+      kind: 'injection',
+      variant: 'host',
+    });
+
+    const result = ctx.agent.context.applyCompaction({
+      summary: 'summary of mixed history',
+      compactedCount: 5,
+      tokensBefore: 100,
+    });
+    ctx.agent.context.appendSystemReminder('fresh reminder', {
+      kind: 'injection',
+      variant: 'host',
+    });
+
+    expect(ctx.agent.context.history.map(({ role, origin }) => ({ role, origin }))).toEqual([
+      { role: 'user', origin: { kind: 'user' } },
+      { role: 'user', origin: { kind: 'compaction_summary' } },
+      { role: 'user', origin: { kind: 'injection', variant: 'host' } },
+    ]);
+    expect(result.keptUserMessageCount).toBe(1);
+  });
+
   it('clears context before the next LLM request', async () => {
     const ctx = testAgent();
     ctx.configure();
@@ -648,9 +675,8 @@ describe('Agent context', () => {
       summary: 'summary of old context',
       compactedCount: 1,
       tokensBefore: 100,
-      tokensAfter: 20,
     });
-    expect(ctx.agent.context.history[0]?.origin).toEqual({ kind: 'compaction_summary' });
+    expect(ctx.agent.context.history.at(-1)?.origin).toEqual({ kind: 'compaction_summary' });
 
     ctx.mockNextResponse({ type: 'text', text: 'after compaction' });
     await ctx.rpc.prompt({ input: [{ type: 'text', text: 'new prompt' }] });
@@ -660,8 +686,9 @@ describe('Agent context', () => {
       system: <system-prompt>
       tools: []
       messages:
-        assistant: text "summary of old context"
-        user: text "recent user message\\n\\nnew prompt"
+        user: text "old user message\\n\\nrecent user message"
+        user: text "summary of old context"
+        user: text "new prompt"
     `);
     await ctx.expectResumeMatches();
   });
@@ -812,7 +839,6 @@ describe('Agent context', () => {
       summary: 'summary of compacted context',
       compactedCount: 1,
       tokensBefore: 100,
-      tokensAfter: 20,
     });
     ctx.agent.context.appendUserMessage([{ type: 'text', text: 'recent user message' }]);
     ctx.agent.context.appendMessage({
@@ -830,7 +856,11 @@ describe('Agent context', () => {
 
     expect(ctx.agent.context.history).toEqual([
       expect.objectContaining({
-        role: 'assistant',
+        role: 'user',
+        content: [{ type: 'text', text: 'old user message' }],
+      }),
+      expect.objectContaining({
+        role: 'user',
         origin: { kind: 'compaction_summary' },
         content: [{ type: 'text', text: 'summary of compacted context' }],
       }),
@@ -852,7 +882,6 @@ describe('Agent context', () => {
       summary: 'summary of compacted context',
       compactedCount: 1,
       tokensBefore: 100,
-      tokensAfter: 20,
     });
     ctx.agent.context.appendUserMessage([{ type: 'text', text: 'recent user message' }]);
     ctx.agent.context.appendMessage({
@@ -866,7 +895,11 @@ describe('Agent context', () => {
     }).not.toThrow();
     expect(ctx.agent.context.history).toEqual([
       expect.objectContaining({
-        role: 'assistant',
+        role: 'user',
+        content: [{ type: 'text', text: 'old user message' }],
+      }),
+      expect.objectContaining({
+        role: 'user',
         origin: { kind: 'compaction_summary' },
         content: [{ type: 'text', text: 'summary of compacted context' }],
       }),
diff --git a/packages/agent-core/test/agent/injection/manager.test.ts b/packages/agent-core/test/agent/injection/manager.test.ts
index a8a91ea93..e5c91740e 100644
--- a/packages/agent-core/test/agent/injection/manager.test.ts
+++ b/packages/agent-core/test/agent/injection/manager.test.ts
@@ -15,9 +15,9 @@ class RecordingInjector extends DynamicInjector {
     super.onContextClear();
   }
 
-  override onContextCompacted(compactedCount: number): void {
+  override onContextCompacted(): void {
     this.compactionCalls += 1;
-    super.onContextCompacted(compactedCount);
+    super.onContextCompacted();
   }
 
   protected override getInjection(): string | undefined {
@@ -28,7 +28,7 @@ class RecordingInjector extends DynamicInjector {
 class BoomInjector extends DynamicInjector {
   override readonly injectionVariant = 'boom_test';
 
-  override onContextCompacted(_compactedCount: number): void {
+  override onContextCompacted(): void {
     throw new Error('boom-compact');
   }
 
@@ -49,7 +49,7 @@ describe('InjectionManager.onContextCompacted', () => {
     const b = new RecordingInjector(ctx.agent);
     installInjectors(ctx.agent.injection, [a, b]);
 
-    ctx.agent.injection.onContextCompacted(3);
+    ctx.agent.injection.onContextCompacted();
 
     expect(a.compactionCalls).toBe(1);
     expect(b.compactionCalls).toBe(1);
@@ -62,7 +62,7 @@ describe('InjectionManager.onContextCompacted', () => {
     installInjectors(ctx.agent.injection, [new BoomInjector(ctx.agent), recorder]);
 
     expect(() => {
-      ctx.agent.injection.onContextCompacted(2);
+      ctx.agent.injection.onContextCompacted();
     }).not.toThrow();
     expect(recorder.compactionCalls).toBe(1);
   });
@@ -74,11 +74,11 @@ describe('InjectionManager.onContextCompacted', () => {
     installInjectors(ctx.agent.injection, [new BoomInjector(ctx.agent), recorder]);
 
     expect(() => {
-      ctx.agent.injection.onContextCompacted(1);
+      ctx.agent.injection.onContextCompacted();
     }).not.toThrow();
     expect(recorder.compactionCalls).toBe(1);
 
-    ctx.agent.injection.onContextCompacted(1);
+    ctx.agent.injection.onContextCompacted();
     expect(recorder.compactionCalls).toBe(2);
   });
 
diff --git a/packages/agent-core/test/agent/permission.test.ts b/packages/agent-core/test/agent/permission.test.ts
index 8d0b27712..3e3a75d6e 100644
--- a/packages/agent-core/test/agent/permission.test.ts
+++ b/packages/agent-core/test/agent/permission.test.ts
@@ -276,6 +276,46 @@ describe('Permission auto mode', () => {
     );
   });
 
+  it('reinjects the auto mode reminder after context compaction', async () => {
+    const appendSystemReminder = vi.fn();
+    const agent = {
+      permission: { mode: 'auto' },
+      context: { history: [], appendSystemReminder },
+    } as unknown as Agent;
+    const injector = new PermissionModeInjector(agent);
+
+    await injector.inject();
+    appendSystemReminder.mockClear();
+    injector.onContextCompacted();
+    await injector.inject();
+
+    expect(appendSystemReminder).toHaveBeenCalledWith(
+      expect.stringContaining('Do NOT call AskUserQuestion while auto mode is active'),
+      { kind: 'injection', variant: 'permission_mode' },
+    );
+  });
+
+  it('keeps the auto mode exit reminder after compaction if the mode changes', async () => {
+    const appendSystemReminder = vi.fn();
+    const permission = { mode: 'auto' as PermissionMode };
+    const agent = {
+      permission,
+      context: { history: [], appendSystemReminder },
+    } as unknown as Agent;
+    const injector = new PermissionModeInjector(agent);
+
+    await injector.inject();
+    appendSystemReminder.mockClear();
+    injector.onContextCompacted();
+    permission.mode = 'manual';
+    await injector.inject();
+
+    expect(appendSystemReminder).toHaveBeenCalledWith(
+      expect.stringContaining('Auto permission mode is no longer active'),
+      { kind: 'injection', variant: 'permission_mode' },
+    );
+  });
+
   it('blocks AskUserQuestion in auto mode before execution', async () => {
     const { manager, requestApproval } = makePermissionManager(async () => ({
       decision: 'approved',
diff --git a/packages/agent-core/test/agent/records/index.test.ts b/packages/agent-core/test/agent/records/index.test.ts
index 56ed53a1b..ce5c4edad 100644
--- a/packages/agent-core/test/agent/records/index.test.ts
+++ b/packages/agent-core/test/agent/records/index.test.ts
@@ -428,6 +428,7 @@ describe('agent replay range build', () => {
           compactedCount: 0,
           tokensBefore: 10,
           tokensAfter: 3,
+          keptUserMessageCount: 0,
         },
       }),
     ]);
diff --git a/packages/agent-core/test/agent/resume.test.ts b/packages/agent-core/test/agent/resume.test.ts
index 301e2533a..7430d5763 100644
--- a/packages/agent-core/test/agent/resume.test.ts
+++ b/packages/agent-core/test/agent/resume.test.ts
@@ -79,7 +79,8 @@ describe('Agent resume', () => {
         system: <system-prompt>
         tools: Bash
         messages:
-          assistant: text "Historical compacted summary."
+          user: text "Historical prompt"
+          user: text "Historical compacted summary."
           user: text "Fresh prompt after resume"
           user: text <plan-mode-reminder>
     `);
@@ -355,7 +356,11 @@ describe('Agent resume', () => {
 
     expect(ctx.agent.context.history).toEqual([
       expect.objectContaining({
-        role: 'assistant',
+        role: 'user',
+        content: [{ type: 'text', text: 'Historical prompt before compaction' }],
+      }),
+      expect.objectContaining({
+        role: 'user',
         content: [{ type: 'text', text: 'Compacted implementation notes.' }],
         origin: { kind: 'compaction_summary' },
       }),
@@ -375,6 +380,7 @@ describe('Agent resume', () => {
           compactedCount: 1,
           tokensBefore: 120,
           tokensAfter: 24,
+          keptUserMessageCount: 1,
         },
         instruction: 'preserve implementation notes',
       }),
diff --git a/packages/agent-core/test/prompt-placeholders.test.ts b/packages/agent-core/test/prompt-placeholders.test.ts
index a98e977e3..9566415c7 100644
--- a/packages/agent-core/test/prompt-placeholders.test.ts
+++ b/packages/agent-core/test/prompt-placeholders.test.ts
@@ -22,8 +22,8 @@ const SRC = join(import.meta.dirname, '..', 'src');
 // `.md` files rendered through `renderPrompt`. Keep in sync when a new
 // templated prompt file is introduced.
 const TEMPLATED = new Set([
-  'profile/default/system.md',
   'agent/compaction/compaction-instruction.md',
+  'profile/default/system.md',
   'tools/builtin/file/read.md',
   'tools/builtin/file/read-media.md',
   'tools/builtin/shell/bash.md',
diff --git a/packages/agent-core/test/services/message-transcript.test.ts b/packages/agent-core/test/services/message-transcript.test.ts
index 4ec462f2d..e848656db 100644
--- a/packages/agent-core/test/services/message-transcript.test.ts
+++ b/packages/agent-core/test/services/message-transcript.test.ts
@@ -92,26 +92,63 @@ describe('reduceWireRecords', () => {
     expect(foldedLength).toBe(2);
   });
 
-  it('compaction keeps the prefix and inserts the summary at the fold point', () => {
+  it('compaction keeps the prefix and appends the user-role summary', () => {
     const { entries, foldedLength } = reduceWireRecords([
       appendMessage(userMessage('u1')),
       ...assistantStep('s1', 'a1'),
       appendMessage(userMessage('u2')),
       ...assistantStep('s2', 'a2'),
-      // folded history is [u1, a1, u2, a2]; compact the first 3, keep a2.
-      compaction('SUM', 3),
+      compaction('SUM', 4),
       appendMessage(userMessage('u3')),
     ]);
     expect(entries.map((e) => textOf(e.message))).toEqual([
       'u1',
       'a1',
       'u2',
-      'SUM',
       'a2',
+      'SUM',
       'u3',
     ]);
-    expect(entries[3]!.message.origin).toEqual({ kind: 'compaction_summary' });
-    // live folded view would be [SUM, a2, u3]
+    expect(entries[4]!.message.origin).toEqual({ kind: 'compaction_summary' });
+    expect(entries[4]!.message.role).toBe('user');
+    // live folded view would be [u1, u2, SUM, u3]
+    expect(foldedLength).toBe(4);
+  });
+
+  it('keeps shell and local-command output in the transcript but not foldedLength', () => {
+    const { entries, foldedLength } = reduceWireRecords([
+      appendMessage(userMessage('u1')),
+      appendMessage(userMessage('! pwd', { kind: 'shell_command', phase: 'input' })),
+      appendMessage(userMessage('local output', { kind: 'injection', variant: 'local-command-stdout' })),
+      ...assistantStep('s1', 'a1'),
+      {
+        type: 'context.apply_compaction',
+        summary: 'SUM',
+        compactedCount: 4,
+        tokensBefore: 100,
+        tokensAfter: 20,
+        keptUserMessageCount: 1,
+      } as AgentRecord,
+      appendMessage(userMessage('u2')),
+    ]);
+
+    expect(entries.map((e) => textOf(e.message))).toEqual([
+      'u1',
+      '! pwd',
+      'local output',
+      'a1',
+      'SUM',
+      'u2',
+    ]);
+    expect(entries.map((e) => e.message.role)).toEqual([
+      'user',
+      'user',
+      'user',
+      'assistant',
+      'user',
+      'user',
+    ]);
+    // 1 kept real user message + summary + u2 appended after compaction.
     expect(foldedLength).toBe(3);
   });
 
@@ -120,11 +157,68 @@ describe('reduceWireRecords', () => {
       appendMessage(userMessage('u1')),
       compaction('S1', 1),
       appendMessage(userMessage('u2')),
-      // folded = [S1, u2]; compact both.
-      compaction('S2', 2),
+      compaction('S2', 3),
     ]);
     expect(entries.map((e) => textOf(e.message))).toEqual(['u1', 'S1', 'u2', 'S2']);
-    expect(foldedLength).toBe(1);
+    // live folded view would be [u1, u2, S2]
+    expect(foldedLength).toBe(3);
+  });
+
+  it('uses the recorded kept-user count for foldedLength when present', () => {
+    // The live context kept only the most recent real user message (e.g. the
+    // older ones were truncated in a prior compaction, or a clear dropped
+    // them). The full transcript still holds all three, so re-deriving from
+    // it would yield 3 and disagree with the live context. The reducer must
+    // trust the count recorded by ContextMemory.applyCompaction.
+    const { foldedLength } = reduceWireRecords([
+      appendMessage(userMessage('u1')),
+      appendMessage(userMessage('u2')),
+      appendMessage(userMessage('u3')),
+      {
+        type: 'context.apply_compaction',
+        summary: 'SUM',
+        compactedCount: 3,
+        tokensBefore: 100,
+        tokensAfter: 20,
+        keptUserMessageCount: 1,
+      } as AgentRecord,
+      appendMessage(userMessage('u4')),
+    ]);
+    // 1 kept user message + summary + u4 appended after compaction.
+    expect(foldedLength).toBe(3);
+  });
+
+  it('drops a late tool result after compaction closes an open exchange', () => {
+    const { entries, foldedLength } = reduceWireRecords([
+      appendMessage(userMessage('u1')),
+      loopEvent({ type: 'step.begin', uuid: 's1', turnId: 't', step: 0 }),
+      loopEvent({
+        type: 'tool.call',
+        uuid: 'c1',
+        turnId: 't',
+        step: 0,
+        stepUuid: 's1',
+        toolCallId: 'call_1',
+        name: 'Bash',
+        arguments: '{"command":"ls"}',
+      }),
+      compaction('SUM', 3),
+      loopEvent({
+        type: 'tool.result',
+        parentUuid: 'c1',
+        toolCallId: 'call_1',
+        result: { output: 'late result' },
+      }),
+      appendMessage(userMessage('u2')),
+    ]);
+
+    // Compaction closes the open exchange, so the late tool result is an
+    // orphan and dropped — matching ContextMemory — and the following user
+    // message is appended normally instead of being stranded in `deferred`.
+    expect(entries.map((e) => e.message.role)).toEqual(['user', 'assistant', 'user', 'user']);
+    expect(entries.map((e) => textOf(e.message))).toEqual(['u1', '', 'SUM', 'u2']);
+    // live folded view would be [u1, SUM, u2]
+    expect(foldedLength).toBe(3);
   });
 
   it('undo removes through the last real user prompt and skips injections', () => {
@@ -433,19 +527,16 @@ describe('MessageService over a compacted wire log', () => {
       records.map((r) => JSON.stringify(r)).join('\n') + '\n',
       'utf8',
     );
-    // What getContext would return after the fold.
+    // What getContext would return after the fold: kept user messages + summary.
     liveHistory = [
+      userMessage('u1'),
+      userMessage('u2'),
       {
-        role: 'assistant',
+        role: 'user',
         content: [{ type: 'text', text: 'SUM' }],
         toolCalls: [],
         origin: { kind: 'compaction_summary' },
       } as ContextMessage,
-      {
-        role: 'assistant',
-        content: [{ type: 'text', text: 'a2' }],
-        toolCalls: [],
-      } as ContextMessage,
     ];
     const rpc: Partial<CoreRPC> = {
       listSessions: vi.fn().mockImplementation(async () => [summary()]),
@@ -473,8 +564,8 @@ describe('MessageService over a compacted wire log', () => {
     const asc = [...page.items].reverse();
     expect(
       asc.map((m) => (m.content[0] as { text?: string }).text ?? '[non-text]'),
-    ).toEqual(['u1', 'a1', 'u2', 'SUM', 'a2']);
-    expect(asc[3]!.metadata).toEqual({ origin: { kind: 'compaction_summary' } });
+    ).toEqual(['u1', 'a1', 'u2', 'a2', 'SUM']);
+    expect(asc[4]!.metadata).toEqual({ origin: { kind: 'compaction_summary' } });
   });
 
   it('uses wire record times for created_at, strictly increasing', async () => {
@@ -495,7 +586,7 @@ describe('MessageService over a compacted wire log', () => {
     const asc = [...page.items].reverse();
     expect(
       asc.map((m) => (m.content[0] as { text?: string }).text ?? '[non-text]'),
-    ).toEqual(['u1', 'a1', 'u2', 'SUM', 'a2', 'u3-live']);
+    ).toEqual(['u1', 'a1', 'u2', 'a2', 'SUM', 'u3-live']);
   });
 
   it('get() resolves ids against the same full transcript', async () => {
@@ -511,8 +602,9 @@ describe('MessageService over a compacted wire log', () => {
     const page = await impl.list(SESSION_ID, { page_size: 100 });
     const asc = [...page.items].reverse();
     expect(asc.map((m) => (m.content[0] as { text?: string }).text)).toEqual([
+      'u1',
+      'u2',
       'SUM',
-      'a2',
     ]);
   });
 
@@ -530,6 +622,6 @@ describe('MessageService over a compacted wire log', () => {
     const asc = [...page.items].reverse();
     expect(
       asc.map((m) => (m.content[0] as { text?: string }).text ?? '[non-text]'),
-    ).toEqual(['u1', 'a1', 'u2', 'SUM', 'a2', 'u3']);
+    ).toEqual(['u1', 'a1', 'u2', 'a2', 'SUM', 'u3']);
   });
 });
diff --git a/packages/agent-core/test/session/init.test.ts b/packages/agent-core/test/session/init.test.ts
index 89657684a..ec3148787 100644
--- a/packages/agent-core/test/session/init.test.ts
+++ b/packages/agent-core/test/session/init.test.ts
@@ -166,6 +166,53 @@ describe('Session.init', () => {
     }
   });
 
+  it('refreshes AGENTS.md from a resumed native session system prompt', async () => {
+    const workDir = await makeTempDir();
+    const sessionDir = await makeTempDir();
+    await mkdir(join(workDir, '.git'));
+    await writeFile(join(workDir, 'AGENTS.md'), 'initial resume instructions', 'utf-8');
+
+    const firstSession = new Session({
+      id: 'test-resume-system-prompt-refresh',
+      kaos: testKaos.withCwd(workDir),
+      persistenceKaos: testKaos.withCwd(workDir),
+      homedir: sessionDir,
+      rpc: createSessionRpc([]),
+      skills: { explicitDirs: [join(workDir, 'missing-skills')] },
+      providerManager: testProviderManager(),
+    });
+    try {
+      const agent = await firstSession.createMain();
+      expect(agent.config.systemPrompt).toContain('initial resume instructions');
+    } finally {
+      await firstSession.closeForReload();
+    }
+
+    await writeFile(join(workDir, 'AGENTS.md'), 'updated resume instructions', 'utf-8');
+
+    const resumedSession = new Session({
+      id: 'test-resume-system-prompt-refresh',
+      kaos: testKaos.withCwd(workDir),
+      persistenceKaos: testKaos.withCwd(workDir),
+      homedir: sessionDir,
+      rpc: createSessionRpc([]),
+      skills: { explicitDirs: [join(workDir, 'missing-skills')] },
+      providerManager: testProviderManager(),
+    });
+    try {
+      await resumedSession.resume();
+      const resumedAgent = await resumedSession.ensureAgentResumed('main');
+      expect(resumedAgent.config.systemPrompt).toContain('initial resume instructions');
+
+      await resumedAgent.refreshSystemPrompt();
+
+      expect(resumedAgent.config.systemPrompt).toContain('updated resume instructions');
+      expect(resumedAgent.config.systemPrompt).not.toContain('initial resume instructions');
+    } finally {
+      await resumedSession.close();
+    }
+  });
+
   it('rebuilds builtin tools when rebinding the session tool kaos', async () => {
     const workDir = await makeTempDir();
     const sessionDir = await makeTempDir();
diff --git a/packages/kosong/src/providers/anthropic.ts b/packages/kosong/src/providers/anthropic.ts
index 1b43abdda..a4e8bbe29 100644
--- a/packages/kosong/src/providers/anthropic.ts
+++ b/packages/kosong/src/providers/anthropic.ts
@@ -393,15 +393,10 @@ function injectCacheControlOnLastBlock(messages: MessageParam[]): void {
 }
 
 /**
- * Check whether a MessageParam is a user message whose content consists
- * entirely of `tool_result` blocks.
- *
- * Used to detect adjacent tool-result-only messages that must be merged
- * before hitting the Anthropic wire. Per the Messages API parallel-tool-use
- * spec, all `tool_result` blocks answering parallel `tool_use` calls must
- * live in a single user message — splitting them across consecutive user
- * messages fails on strict Anthropic-compatible backends (HTTP 400) and
- * silently degrades parallel tool use on api.anthropic.com.
+ * Whether a user MessageParam consists solely of `tool_result` blocks. Used to
+ * keep tool results bundled with each other (parallel-tool-use spec) while
+ * not merging a tool-result user message into an adjacent plain-text user
+ * message — the two carry different semantics and must stay separate.
  */
 function isToolResultOnly(message: MessageParam): boolean {
   if (message.role !== 'user') return false;
@@ -1000,8 +995,19 @@ export class AnthropicChatProvider implements ChatProvider {
         ]
       : undefined;
 
-    // Convert messages, merging consecutive tool-result-only user messages
-    // into a single user message (Anthropic parallel-tool-use spec).
+    // Convert messages, merging consecutive user messages of the same kind into
+    // one. Strict Anthropic-compatible backends reject consecutive user messages
+    // with HTTP 400 ("roles must alternate"), and api.anthropic.com concatenates
+    // them anyway — so merging is safe for native Anthropic and required for
+    // strict backends. Plain-text user messages merge with plain-text user
+    // messages; tool-result-only user messages merge with tool-result-only ones
+    // (the parallel-tool-use spec requires all tool_result blocks answering
+    // parallel tool_use calls to live in a single user message). A plain-text
+    // user message is intentionally NOT merged into an adjacent tool-result one:
+    // the two carry different semantics and must stay separate. Consecutive
+    // plain-text user messages arise naturally after compaction (kept user
+    // prompts + user-role summary + injected reminders) and from back-to-back
+    // system messages converted to user role above.
     const messages: MessageParam[] = [];
     const normalizedHistory = normalizeToolCallIdsForProvider(
       history,
@@ -1010,7 +1016,12 @@ export class AnthropicChatProvider implements ChatProvider {
     for (const msg of normalizedHistory) {
       const converted = convertMessage(msg, this._model);
       const last = messages.at(-1);
-      if (last !== undefined && isToolResultOnly(last) && isToolResultOnly(converted)) {
+      if (
+        last !== undefined &&
+        last.role === 'user' &&
+        converted.role === 'user' &&
+        isToolResultOnly(last) === isToolResultOnly(converted)
+      ) {
         last.content = [
           ...(last.content as ContentBlockParam[]),
           ...(converted.content as ContentBlockParam[]),
diff --git a/packages/kosong/test/anthropic.test.ts b/packages/kosong/test/anthropic.test.ts
index 3bd9fc70a..e3a64ac1f 100644
--- a/packages/kosong/test/anthropic.test.ts
+++ b/packages/kosong/test/anthropic.test.ts
@@ -1024,6 +1024,28 @@ describe('AnthropicChatProvider', () => {
       expect(msgs[3]!.content[0]!.text).toBe('Now summarize');
     });
 
+    it('merges consecutive plain-text user messages into one', async () => {
+      const provider = createProvider();
+      const history: Message[] = [
+        { role: 'user', content: [{ type: 'text', text: 'First' }], toolCalls: [] },
+        { role: 'user', content: [{ type: 'text', text: 'Second' }], toolCalls: [] },
+        { role: 'user', content: [{ type: 'text', text: 'Third' }], toolCalls: [] },
+      ];
+      const body = await captureRequestBody(provider, '', [], history);
+
+      const msgs = body['messages'] as Array<{
+        role: string;
+        content: Array<{ type: string; text?: string }>;
+      }>;
+
+      // Strict Anthropic-compatible backends reject consecutive user messages,
+      // so back-to-back plain-text user turns (e.g. the post-compaction shape
+      // of kept prompts + user-role summary + reminders) must be collapsed.
+      expect(msgs).toHaveLength(1);
+      expect(msgs[0]!.role).toBe('user');
+      expect(msgs[0]!.content.map((block) => block.text)).toEqual(['First', 'Second', 'Third']);
+    });
+
     it('assistant with thinking (has encrypted -> ThinkingBlockParam)', async () => {
       const provider = createProvider();
       const history: Message[] = [
diff --git a/packages/protocol/src/events.ts b/packages/protocol/src/events.ts
index d0e22e0d6..937ef5510 100644
--- a/packages/protocol/src/events.ts
+++ b/packages/protocol/src/events.ts
@@ -284,6 +284,22 @@ export interface CompactionResult {
   readonly compactedCount: number;
   readonly tokensBefore: number;
   readonly tokensAfter: number;
+  /**
+   * Number of real user messages kept verbatim ahead of the summary in the
+   * post-compaction live context. Recorded so the wire-transcript reducer can
+   * reproduce the live folded length without re-deriving it from the full
+   * transcript (which still holds the untruncated originals of messages the
+   * live context may have truncated, so the two would otherwise diverge).
+   * Optional for backward compatibility with older wire records.
+   */
+  readonly keptUserMessageCount?: number;
+  /**
+   * Oldest messages trimmed from the summarizer input when the compaction
+   * request overflowed the model window; not covered by the produced summary.
+   * Mirrors agent-core's `CompactionResult.droppedCount`; optional for backward
+   * compatibility.
+   */
+  readonly droppedCount?: number;
 }
 
 export interface ToolUpdate {
@@ -944,6 +960,8 @@ export const compactionResultSchema = z.object({
   compactedCount: z.number(),
   tokensBefore: z.number(),
   tokensAfter: z.number(),
+  keptUserMessageCount: z.number().optional(),
+  droppedCount: z.number().optional(),
 }) satisfies z.ZodType<CompactionResult>;
 
 export const toolUpdateSchema = z.object({
diff --git a/packages/server/test/sessions.e2e.test.ts b/packages/server/test/sessions.e2e.test.ts
index 12ae9b761..46d662d79 100644
--- a/packages/server/test/sessions.e2e.test.ts
+++ b/packages/server/test/sessions.e2e.test.ts
@@ -580,7 +580,7 @@ describe('POST /api/v1/sessions/{session_id}:compact — begin compaction', () =
     const env = envelopeOf<unknown>(res.json());
     expect(env.code).toBe(ErrorCode.COMPACTION_UNABLE);
     expect(env.data).toBeNull();
-    expect(env.msg).toMatch(/No prefix/);
+    expect(env.msg).toMatch(/No messages to compact/);
   });
 });
 
diff --git a/packages/server/test/snapshotService.unit.test.ts b/packages/server/test/snapshotService.unit.test.ts
index 1c328e990..be50822eb 100644
--- a/packages/server/test/snapshotService.unit.test.ts
+++ b/packages/server/test/snapshotService.unit.test.ts
@@ -235,11 +235,11 @@ describe('SnapshotService.read', () => {
     ]);
 
     const snap = await f.service.read(sid);
-    // Reduce keeps the prefix and inserts the summary at the fold; final
-    // entry list is older-1, older-2, <summary>, after-compaction.
+    // Reduce keeps the prefix and appends a user-role summary; final entry
+    // list is older-1, older-2, <summary>, after-compaction.
     expect(snap.messages.items).toHaveLength(4);
     const summaryMsg = snap.messages.items[2]!;
-    expect(summaryMsg.role).toBe('assistant');
+    expect(summaryMsg.role).toBe('user');
     expect((summaryMsg.content[0] as { text: string }).text).toBe('compacted prefix');
     expect(snap.messages.items[3]!.role).toBe('user');
   });