fix: check remapped errorResponse status for 404->429 usage_limit rotation

ndycode · ndycode · commit b2aa55f63454 · 2026-04-06T11:31:17.000+08:00
When OpenAI returns 404 with usage_limit_reached body, fetch-helpers.ts
remaps it to a 429 response. But the main pipeline was checking the
original response.status (404) instead of errorResponse.status (429),
so the rate-limit handling branch never ran. This caused exhausted
accounts to appear healthy and sessions to be killed instead of
rotating to available accounts.

Change index.ts:1861 from:
  if (response.status === 429)
to:
  if (errorResponse.status === 429 &amp;&amp; rateLimit)

The added rateLimit guard also prevents entitlement-style 429s (where
rateLimit is explicitly set to undefined) from incorrectly marking
accounts as rate-limited.

Tests: 2 new tests covering both scenarios, full suite passes (3314 tests).
diff --git a/index.ts b/index.ts
@@ -1866,7 +1866,7 @@ let sessionAffinityWriteVersion = 0;
 													break;
 												}
 
-												if (response.status === 429) {
+												if (errorResponse.status === 429 && rateLimit) {
 													runtimeMetrics.rateLimitedResponses++;
 													const retryAfterMs =
 														rateLimit?.retryAfterMs ?? 60_000;
diff --git a/test/index.test.ts b/test/index.test.ts
@@ -4260,10 +4260,9 @@ describe("OpenAIOAuthPlugin runtime toast forwarding", () => {
 		expect(showRuntimeToastMock).not.toHaveBeenCalled();
 	});
 
-	it("applies the default cooldown when a 429 has no parsed retry metadata", async () => {
+	it("skips rate-limit marking when a 429 has no parsed retry metadata (entitlement-style)", async () => {
 		const { AccountManager } = await import("../lib/accounts.js");
 		const fetchHelpersModule = await import("../lib/request/fetch-helpers.js");
-		const rateLimitBackoffModule = await import("../lib/request/rate-limit-backoff.js");
 
 		const markRateLimitedWithReason = vi.fn();
 		const manager = {
@@ -4318,13 +4317,95 @@ describe("OpenAIOAuthPlugin runtime toast forwarding", () => {
 			rateLimit: undefined,
 			errorBody: "rate limited",
 		} as never);
+		globalThis.fetch = vi
+			.fn()
+			.mockResolvedValueOnce(new Response("rate limited", { status: 429 }));
+
+		const mockClient = createMockClient();
+		const { OpenAIOAuthPlugin } = await import("../index.js");
+		const plugin = await OpenAIOAuthPlugin({ client: mockClient } as never) as unknown as PluginType;
+		const sdk = await plugin.auth.loader(getOAuthAuth, { options: {}, models: {} });
+		const response = await sdk.fetch!("https://api.openai.com/v1/chat/completions", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+
+		expect(response.status).toBe(429);
+		expect(globalThis.fetch).toHaveBeenCalledTimes(1);
+		expect(markRateLimitedWithReason).not.toHaveBeenCalled();
+	});
+
+	it("rotates account when upstream 404 usage_limit_reached is remapped to 429", async () => {
+		const { AccountManager } = await import("../lib/accounts.js");
+		const fetchHelpersModule = await import("../lib/request/fetch-helpers.js");
+		const rateLimitBackoffModule = await import("../lib/request/rate-limit-backoff.js");
+
+		const markRateLimitedWithReason = vi.fn();
+		const manager = {
+			getAccountCount: () => 1,
+			getCurrentOrNextForFamilyHybrid: () => ({
+				index: 0,
+				accountId: "acc-1",
+				email: "alpha@example.com",
+				refreshToken: "refresh-1",
+			}),
+			getCurrentOrNextForFamily: () => ({
+				index: 0,
+				accountId: "acc-1",
+				email: "alpha@example.com",
+				refreshToken: "refresh-1",
+			}),
+			getCurrentWorkspace: () => null,
+			getAccountByIndex: () => null,
+			getAccountsSnapshot: () => [],
+			isAccountAvailableForFamily: () => true,
+			toAuthDetails: () => ({
+				type: "oauth" as const,
+				access: "access-token",
+				refresh: "refresh-1",
+				expires: Date.now() + 60_000,
+			}),
+			hasRefreshToken: () => true,
+			saveToDiskDebounced: () => {},
+			updateFromAuth: () => {},
+			clearAuthFailures: () => {},
+			incrementAuthFailures: () => 1,
+			saveToDisk: async () => {},
+			markAccountCoolingDown: () => {},
+			markRateLimited: () => {},
+			markRateLimitedWithReason,
+			consumeToken: () => true,
+			refundToken: () => {},
+			syncCodexCliActiveSelectionForIndex: async () => {},
+			markSwitched: () => {},
+			removeAccount: () => {},
+			recordFailure: () => {},
+			recordSuccess: () => {},
+			recordRateLimit: () => {},
+			getMinWaitTimeForFamily: () => 0,
+			shouldShowAccountToast: () => true,
+			markToastShown: () => {},
+			setActiveIndex: () => null,
+		};
+		vi.spyOn(AccountManager, "loadFromDisk").mockResolvedValue(manager as never);
+		vi.mocked(fetchHelpersModule.handleErrorResponse).mockResolvedValueOnce({
+			response: new Response(JSON.stringify({ error: { code: "usage_limit_reached", message: "Usage limit reached" } }), { status: 429 }),
+			rateLimit: {
+				retryAfterMs: 2 * 60 * 60 * 1000,
+				code: "usage_limit_reached",
+			},
+			errorBody: JSON.stringify({ error: { code: "usage_limit_reached", message: "Usage limit reached" } }),
+		} as never);
 		vi.mocked(rateLimitBackoffModule.getRateLimitBackoff).mockReturnValueOnce({
-			attempt: 1,
-			delayMs: 5_000,
+			attempt: 2,
+			delayMs: 1000,
 		});
 		globalThis.fetch = vi
 			.fn()
-			.mockResolvedValueOnce(new Response("rate limited", { status: 429 }));
+			.mockResolvedValueOnce(
+				new Response(JSON.stringify({ error: { code: "usage_limit_reached", message: "Usage limit reached" } }), { status: 404 }),
+			)
+			.mockResolvedValueOnce(new Response(JSON.stringify({ content: "ok" }), { status: 200 }));
 
 		const mockClient = createMockClient();
 		const { OpenAIOAuthPlugin } = await import("../index.js");
@@ -4336,11 +4417,12 @@ describe("OpenAIOAuthPlugin runtime toast forwarding", () => {
 		});
 
 		expect(response.status).toBe(503);
+		expect(globalThis.fetch).toHaveBeenCalledTimes(1);
 		expect(markRateLimitedWithReason).toHaveBeenCalledWith(
 			expect.objectContaining({ index: 0 }),
-			60_000,
+			2 * 60 * 60 * 1000,
 			"gpt-5.1",
-			"unknown",
+			expect.any(String),
 			"gpt-5.1",
 		);
 	});

Original file line number	Diff line number	Diff line change
`@@ -1866,7 +1866,7 @@ let sessionAffinityWriteVersion = 0;`
`1866`	`1866`	`break;`
`1867`	`1867`	`}`
`1868`	`1868`
`1869`		`- if (response.status === 429) {`
	`1869`	`+ if (errorResponse.status === 429 && rateLimit) {`
`1870`	`1870`	`runtimeMetrics.rateLimitedResponses++;`
`1871`	`1871`	`const retryAfterMs =`
`1872`	`1872`	`rateLimit?.retryAfterMs ?? 60_000;`