From bde789a89e3a6eee90ee328b5e564e1ea70ec2b5 Mon Sep 17 00:00:00 2001 From: Robin1987China <41602358+Robin1987China@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:13:53 +0800 Subject: [PATCH] fix(core): stop duplicating the boundary message in compaction head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit select() partitions the conversation into head (summarized) and recent (kept) at a token budget. When the boundary lands mid-message, that message is split into splitPrefix (head) and splitSuffix (recent). The split branch set split = index + 1 and reused it for both head's slice end and recent's slice start, so head = slice(0, index + 1) included the full boundary message AND splitPrefix (a truncated copy) — duplicating it. head is the exact text fed to the summarizer, so on every overflow-triggered compaction where the boundary lands mid-message the boundary message was sent twice (full + truncated), wasting tokens and risking pushing the summary prompt past the context limit so compaction silently fails. Track two boundaries: headEnd (end of head's full-message slice) and recentStart (start of recent's slice). They differ only in the split case (headEnd = index, recentStart = index + 1); all other paths keep them equal, preserving existing behavior. Export select and add a regression test for the duplication plus a fully-fitting sanity case. Closes #33329 --- packages/core/src/session/compaction.ts | 23 +++++++--- packages/core/test/session-compaction.test.ts | 43 +++++++++++++++++++ 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/packages/core/src/session/compaction.ts b/packages/core/src/session/compaction.ts index 5229949cb958..147af89b6bc4 100644 --- a/packages/core/src/session/compaction.ts +++ b/packages/core/src/session/compaction.ts @@ -130,7 +130,7 @@ const settings = (documents: readonly Config.Entry[]) => { ) } -const select = ( +export const select = ( entries: readonly Entry[], tokens: number, ): { readonly head: string; readonly recent: string } | undefined => { @@ -140,7 +140,12 @@ const select = ( .filter(Boolean) if (conversation.length === 0) return let total = 0 - let split = conversation.length + // head ends at headEnd (exclusive); recent starts at recentStart. They differ + // only when the budget boundary lands mid-message: that message is split into + // splitPrefix (head) and splitSuffix (recent), so the full message must NOT + // also appear in head's slice — head ends at `index`, recent starts at `index + 1`. + let headEnd = conversation.length + let recentStart = conversation.length let splitPrefix = "" let splitSuffix = "" for (let index = conversation.length - 1; index >= 0; index--) { @@ -150,16 +155,22 @@ const select = ( if (remaining > 0) { splitPrefix = conversation[index].slice(0, -remaining) splitSuffix = conversation[index].slice(-remaining) - split = index + 1 + headEnd = index + recentStart = index + 1 + } else { + // No room even for a suffix: the boundary message goes entirely to head. + headEnd = index + 1 + recentStart = index + 1 } break } total = next - split = index + headEnd = index + recentStart = index } return { - head: [...conversation.slice(0, split), splitPrefix].filter(Boolean).join("\n\n"), - recent: [splitSuffix, ...conversation.slice(split)].filter(Boolean).join("\n\n"), + head: [...conversation.slice(0, headEnd), splitPrefix].filter(Boolean).join("\n\n"), + recent: [splitSuffix, ...conversation.slice(recentStart)].filter(Boolean).join("\n\n"), } } diff --git a/packages/core/test/session-compaction.test.ts b/packages/core/test/session-compaction.test.ts index e91c89c00954..b0bfcaa85793 100644 --- a/packages/core/test/session-compaction.test.ts +++ b/packages/core/test/session-compaction.test.ts @@ -1,5 +1,7 @@ import { expect, test } from "bun:test" +import { DateTime } from "effect" import { SessionCompaction } from "@opencode-ai/core/session/compaction" +import { SessionMessage } from "@opencode-ai/core/session/message" test("compaction describes tool media without embedding base64", () => { const base64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAAB" @@ -16,3 +18,44 @@ test("compaction describes tool media without embedding base64", () => { expect(serialized).toBe("Image read successfully\n[Attached image/png: pixel.png]") expect(serialized).not.toContain(base64) }) + +const created = DateTime.makeUnsafe(0) +const userEntry = (seq: number, text: string) => ({ + seq, + message: new SessionMessage.User({ + id: SessionMessage.ID.make(`msg_${seq}`), + type: "user", + text, + time: { created }, + }), +}) + +test("select does not duplicate the boundary message into head", () => { + // serialize() renders a user message as `[User]: `; Token.estimate is + // round(length / 4). With tokens=5: the small message (estimate 3) fits, then + // the big message overflows and is split mid-message. The boundary message + // must appear in head ONLY as the truncated prefix — never in full. + const big = "A".repeat(40) + const entries = [userEntry(0, big), userEntry(1, "BBBB")] + + const result = SessionCompaction.select(entries, 5) + + expect(result).toBeDefined() + // head is just the prefix slice, not the full boundary message + its prefix. + expect(result!.head).toBe(`[User]: ${"A".repeat(32)}`) + expect(result!.recent).toBe(`${"A".repeat(8)}\n\n[User]: BBBB`) + // Regression guard for the duplication bug (head once contained the full + // 40-char message AND its 32-char prefix copy): + expect(result!.head).not.toContain("A".repeat(40)) + expect(result!.head.split("[User]:").length - 1).toBe(1) +}) + +test("select keeps everything in recent when the whole conversation fits", () => { + const entries = [userEntry(0, "first"), userEntry(1, "second")] + + const result = SessionCompaction.select(entries, 1000) + + expect(result).toBeDefined() + expect(result!.head).toBe("") + expect(result!.recent).toBe("[User]: first\n\n[User]: second") +})