fix: bound short-429 retry loop to MAX_SHORT_RETRY_ATTEMPTS

ndycode · ndycode · commit 2b4a80aa7d99 · 2026-04-06T11:31:28.000+08:00
When an upstream perpetually returns short Retry-After values (&lt;= 5s), the short-retry path would loop indefinitely on the same account. Add MAX_SHORT_RETRY_ATTEMPTS (3) so that after 3 consecutive short-cooldown 429s the request falls through to the existing long-cooldown rotation path, which marks the account rate-limited and rotates to the next one.

Changes:

- lib/request/rate-limit-backoff.ts: add MAX_SHORT_RETRY_ATTEMPTS = 3 constant

- index.ts: import MAX_SHORT_RETRY_ATTEMPTS and add attempt &lt; MAX_SHORT_RETRY_ATTEMPTS guard to short-retry condition

- test/index.test.ts: add 2 test cases covering the bound (at-limit rotates, below-limit retries)
diff --git a/index.ts b/index.ts
@@ -167,6 +167,7 @@ import {
 } from "./lib/request/fetch-helpers.js";
 import {
 	getRateLimitBackoff,
+	MAX_SHORT_RETRY_ATTEMPTS,
 	RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS,
 	resetRateLimitBackoff,
 } from "./lib/request/rate-limit-backoff.js";
@@ -1901,26 +1902,29 @@ let sessionAffinityWriteVersion = 0;
 													);
 													const waitLabel = formatWaitTime(cooldownMs);
 
-													if (cooldownMs <= RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS) {
-														if (
-															accountManager.shouldShowAccountToast(
-																account.index,
-																rateLimitToastDebounceMs,
-															)
-														) {
-															await showRuntimeToast(client, 
-																`Rate limited. Retrying in ${waitLabel} (attempt ${attempt})...`,
-																"warning",
-																{ duration: toastDurationMs },
-															);
-															accountManager.markToastShown(account.index);
-														}
+								if (
+									cooldownMs <= RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS &&
+									attempt < MAX_SHORT_RETRY_ATTEMPTS
+								) {
+									if (
+										accountManager.shouldShowAccountToast(
+											account.index,
+											rateLimitToastDebounceMs,
+										)
+									) {
+										await showRuntimeToast(client, 
+											`Rate limited. Retrying in ${waitLabel} (attempt ${attempt})...`,
+											"warning",
+											{ duration: toastDurationMs },
+										);
+										accountManager.markToastShown(account.index);
+									}
 
-														await sleep(
-															addJitter(Math.max(MIN_BACKOFF_MS, cooldownMs), 0.2),
-														);
-														continue;
-													}
+									await sleep(
+										addJitter(Math.max(MIN_BACKOFF_MS, cooldownMs), 0.2),
+									);
+									continue;
+								}
 
 													accountManager.markRateLimitedWithReason(
 														account,
diff --git a/lib/request/rate-limit-backoff.ts b/lib/request/rate-limit-backoff.ts
@@ -20,6 +20,16 @@ const MAX_BACKOFF_MS = 60_000;
 
 export const RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS = 5000;
 
+/**
+ * Maximum number of consecutive short-cooldown 429 retries before
+ * falling through to the long-cooldown rotation path.
+ *
+ * Without this bound, an upstream that perpetually returns short
+ * Retry-After values (≤ RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS) would
+ * keep the request loop spinning on the same account indefinitely.
+ */
+export const MAX_SHORT_RETRY_ATTEMPTS = 3;
+
 interface RateLimitState {
 	consecutive429: number;
 	lastAt: number;
diff --git a/test/index.test.ts b/test/index.test.ts
@@ -206,6 +206,7 @@ vi.mock("../lib/recovery.js", () => ({
 vi.mock("../lib/request/rate-limit-backoff.js", () => ({
 	getRateLimitBackoff: vi.fn(() => ({ attempt: 1, delayMs: 1000 })),
 	RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS: 5000,
+	MAX_SHORT_RETRY_ATTEMPTS: 3,
 	resetRateLimitBackoff: vi.fn(),
 }));
 
@@ -4780,6 +4781,195 @@ describe("OpenAIOAuthPlugin runtime toast forwarding", () => {
 		);
 	});
 
+	it("falls through to rotation when short-retry attempt count reaches MAX_SHORT_RETRY_ATTEMPTS", async () => {
+		const { AccountManager } = await import("../lib/accounts.js");
+		const fetchHelpersModule = await import("../lib/request/fetch-helpers.js");
+		const rateLimitBackoffModule = await import("../lib/request/rate-limit-backoff.js");
+
+		const markRateLimitedWithReason = vi.fn();
+		const recordRateLimit = vi.fn();
+		const manager = {
+			getAccountCount: () => 1,
+			getCurrentOrNextForFamilyHybrid: () => ({
+				index: 0,
+				accountId: "acc-1",
+				email: "alpha@example.com",
+				refreshToken: "refresh-1",
+			}),
+			getCurrentOrNextForFamily: () => ({
+				index: 0,
+				accountId: "acc-1",
+				email: "alpha@example.com",
+				refreshToken: "refresh-1",
+			}),
+			getCurrentWorkspace: () => null,
+			getAccountByIndex: () => null,
+			getAccountsSnapshot: () => [],
+			isAccountAvailableForFamily: () => true,
+			toAuthDetails: () => ({
+				type: "oauth" as const,
+				access: "access-token",
+				refresh: "refresh-1",
+				expires: Date.now() + 60_000,
+			}),
+			hasRefreshToken: () => true,
+			saveToDiskDebounced: () => {},
+			updateFromAuth: () => {},
+			clearAuthFailures: () => {},
+			incrementAuthFailures: () => 1,
+			saveToDisk: async () => {},
+			markAccountCoolingDown: () => {},
+			markRateLimited: () => {},
+			markRateLimitedWithReason,
+			consumeToken: () => true,
+			refundToken: () => {},
+			syncCodexCliActiveSelectionForIndex: async () => {},
+			markSwitched: () => {},
+			removeAccount: () => {},
+			recordFailure: () => {},
+			recordSuccess: () => {},
+			recordRateLimit,
+			getMinWaitTimeForFamily: () => 0,
+			shouldShowAccountToast: () => true,
+			markToastShown: () => {},
+			setActiveIndex: () => null,
+		};
+		vi.spyOn(AccountManager, "loadFromDisk").mockResolvedValue(manager as never);
+
+		// Return a short cooldown (1s) but attempt=3, which is >= MAX_SHORT_RETRY_ATTEMPTS (3).
+		// This should fall through to rotation instead of short-retrying.
+		vi.mocked(fetchHelpersModule.handleErrorResponse).mockResolvedValueOnce({
+			response: new Response("rate limited", { status: 429 }),
+			rateLimit: {
+				retryAfterMs: 1000,
+				code: "rate_limit_exceeded",
+			},
+			errorBody: "rate limited",
+		} as never);
+		vi.mocked(rateLimitBackoffModule.getRateLimitBackoff).mockReturnValueOnce({
+			attempt: 3,
+			delayMs: 1000,
+		});
+		globalThis.fetch = vi
+			.fn()
+			.mockResolvedValueOnce(new Response("rate limited", { status: 429 }));
+
+		const mockClient = createMockClient();
+		const { OpenAIOAuthPlugin } = await import("../index.js");
+		const plugin = await OpenAIOAuthPlugin({ client: mockClient } as never) as unknown as PluginType;
+		const sdk = await plugin.auth.loader(getOAuthAuth, { options: {}, models: {} });
+		const response = await sdk.fetch!("https://api.openai.com/v1/chat/completions", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+
+		// Should have rotated (503 returned as single-account pool exhausted)
+		// rather than short-retrying
+		expect(response.status).toBe(503);
+		expect(globalThis.fetch).toHaveBeenCalledTimes(1);
+		expect(markRateLimitedWithReason).toHaveBeenCalledWith(
+			expect.objectContaining({ index: 0 }),
+			1000,
+			"gpt-5.1",
+			expect.any(String),
+			"gpt-5.1",
+		);
+		expect(recordRateLimit).toHaveBeenCalled();
+	});
+
+	it("short-retries the same account when attempt is below MAX_SHORT_RETRY_ATTEMPTS", async () => {
+		const { AccountManager } = await import("../lib/accounts.js");
+		const fetchHelpersModule = await import("../lib/request/fetch-helpers.js");
+		const rateLimitBackoffModule = await import("../lib/request/rate-limit-backoff.js");
+
+		const markRateLimitedWithReason = vi.fn();
+		const manager = {
+			getAccountCount: () => 1,
+			getCurrentOrNextForFamilyHybrid: () => ({
+				index: 0,
+				accountId: "acc-1",
+				email: "alpha@example.com",
+				refreshToken: "refresh-1",
+			}),
+			getCurrentOrNextForFamily: () => ({
+				index: 0,
+				accountId: "acc-1",
+				email: "alpha@example.com",
+				refreshToken: "refresh-1",
+			}),
+			getCurrentWorkspace: () => null,
+			getAccountByIndex: () => null,
+			getAccountsSnapshot: () => [],
+			isAccountAvailableForFamily: () => true,
+			toAuthDetails: () => ({
+				type: "oauth" as const,
+				access: "access-token",
+				refresh: "refresh-1",
+				expires: Date.now() + 60_000,
+			}),
+			hasRefreshToken: () => true,
+			saveToDiskDebounced: () => {},
+			updateFromAuth: () => {},
+			clearAuthFailures: () => {},
+			incrementAuthFailures: () => 1,
+			saveToDisk: async () => {},
+			markAccountCoolingDown: () => {},
+			markRateLimited: () => {},
+			markRateLimitedWithReason,
+			consumeToken: () => true,
+			refundToken: () => {},
+			syncCodexCliActiveSelectionForIndex: async () => {},
+			markSwitched: () => {},
+			removeAccount: () => {},
+			recordFailure: () => {},
+			recordSuccess: () => {},
+			recordRateLimit: () => {},
+			getMinWaitTimeForFamily: () => 0,
+			shouldShowAccountToast: () => true,
+			markToastShown: () => {},
+			setActiveIndex: () => null,
+		};
+		vi.spyOn(AccountManager, "loadFromDisk").mockResolvedValue(manager as never);
+
+		// First request: 429 with attempt=2 (below MAX_SHORT_RETRY_ATTEMPTS=3) → short retry
+		// Second request: 200 OK
+		vi.mocked(fetchHelpersModule.handleErrorResponse).mockResolvedValueOnce({
+			response: new Response("rate limited", { status: 429 }),
+			rateLimit: {
+				retryAfterMs: 1000,
+				code: "rate_limit_exceeded",
+			},
+			errorBody: "rate limited",
+		} as never);
+		vi.mocked(rateLimitBackoffModule.getRateLimitBackoff).mockReturnValueOnce({
+			attempt: 2,
+			delayMs: 1000,
+		});
+		globalThis.fetch = vi
+			.fn()
+			.mockResolvedValueOnce(new Response("rate limited", { status: 429 }))
+			.mockResolvedValueOnce(new Response(JSON.stringify({ content: "ok" }), { status: 200 }));
+
+		const mockClient = createMockClient();
+		const { OpenAIOAuthPlugin } = await import("../index.js");
+		const plugin = await OpenAIOAuthPlugin({ client: mockClient } as never) as unknown as PluginType;
+		const sdk = await plugin.auth.loader(getOAuthAuth, { options: {}, models: {} });
+
+		vi.useFakeTimers();
+		const responsePromise = sdk.fetch!("https://api.openai.com/v1/chat/completions", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+		await vi.advanceTimersByTimeAsync(2000);
+		const response = await responsePromise;
+
+		// Should have short-retried and succeeded
+		expect(response.status).toBe(200);
+		expect(globalThis.fetch).toHaveBeenCalledTimes(2);
+		// markRateLimitedWithReason should NOT have been called (no rotation)
+		expect(markRateLimitedWithReason).not.toHaveBeenCalled();
+	});
+
 	it("persists the longer parsed rate-limit cooldown across overlapping requests", async () => {
 		const { AccountManager } = await import("../lib/accounts.js");
 		const { AccountManager: ActualAccountManager } =