Skip to content

Commit 2b4a80a

Browse files
committed
fix: bound short-429 retry loop to MAX_SHORT_RETRY_ATTEMPTS
When an upstream perpetually returns short Retry-After values (<= 5s), the short-retry path would loop indefinitely on the same account. Add MAX_SHORT_RETRY_ATTEMPTS (3) so that after 3 consecutive short-cooldown 429s the request falls through to the existing long-cooldown rotation path, which marks the account rate-limited and rotates to the next one. Changes: - lib/request/rate-limit-backoff.ts: add MAX_SHORT_RETRY_ATTEMPTS = 3 constant - index.ts: import MAX_SHORT_RETRY_ATTEMPTS and add attempt < MAX_SHORT_RETRY_ATTEMPTS guard to short-retry condition - test/index.test.ts: add 2 test cases covering the bound (at-limit rotates, below-limit retries)
1 parent f44cf0f commit 2b4a80a

3 files changed

Lines changed: 223 additions & 19 deletions

File tree

index.ts

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ import {
167167
} from "./lib/request/fetch-helpers.js";
168168
import {
169169
getRateLimitBackoff,
170+
MAX_SHORT_RETRY_ATTEMPTS,
170171
RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS,
171172
resetRateLimitBackoff,
172173
} from "./lib/request/rate-limit-backoff.js";
@@ -1901,26 +1902,29 @@ let sessionAffinityWriteVersion = 0;
19011902
);
19021903
const waitLabel = formatWaitTime(cooldownMs);
19031904

1904-
if (cooldownMs <= RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS) {
1905-
if (
1906-
accountManager.shouldShowAccountToast(
1907-
account.index,
1908-
rateLimitToastDebounceMs,
1909-
)
1910-
) {
1911-
await showRuntimeToast(client,
1912-
`Rate limited. Retrying in ${waitLabel} (attempt ${attempt})...`,
1913-
"warning",
1914-
{ duration: toastDurationMs },
1915-
);
1916-
accountManager.markToastShown(account.index);
1917-
}
1905+
if (
1906+
cooldownMs <= RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS &&
1907+
attempt < MAX_SHORT_RETRY_ATTEMPTS
1908+
) {
1909+
if (
1910+
accountManager.shouldShowAccountToast(
1911+
account.index,
1912+
rateLimitToastDebounceMs,
1913+
)
1914+
) {
1915+
await showRuntimeToast(client,
1916+
`Rate limited. Retrying in ${waitLabel} (attempt ${attempt})...`,
1917+
"warning",
1918+
{ duration: toastDurationMs },
1919+
);
1920+
accountManager.markToastShown(account.index);
1921+
}
19181922

1919-
await sleep(
1920-
addJitter(Math.max(MIN_BACKOFF_MS, cooldownMs), 0.2),
1921-
);
1922-
continue;
1923-
}
1923+
await sleep(
1924+
addJitter(Math.max(MIN_BACKOFF_MS, cooldownMs), 0.2),
1925+
);
1926+
continue;
1927+
}
19241928

19251929
accountManager.markRateLimitedWithReason(
19261930
account,

lib/request/rate-limit-backoff.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,16 @@ const MAX_BACKOFF_MS = 60_000;
2020

2121
export const RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS = 5000;
2222

23+
/**
24+
* Maximum number of consecutive short-cooldown 429 retries before
25+
* falling through to the long-cooldown rotation path.
26+
*
27+
* Without this bound, an upstream that perpetually returns short
28+
* Retry-After values (≤ RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS) would
29+
* keep the request loop spinning on the same account indefinitely.
30+
*/
31+
export const MAX_SHORT_RETRY_ATTEMPTS = 3;
32+
2333
interface RateLimitState {
2434
consecutive429: number;
2535
lastAt: number;

test/index.test.ts

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ vi.mock("../lib/recovery.js", () => ({
206206
vi.mock("../lib/request/rate-limit-backoff.js", () => ({
207207
getRateLimitBackoff: vi.fn(() => ({ attempt: 1, delayMs: 1000 })),
208208
RATE_LIMIT_SHORT_RETRY_THRESHOLD_MS: 5000,
209+
MAX_SHORT_RETRY_ATTEMPTS: 3,
209210
resetRateLimitBackoff: vi.fn(),
210211
}));
211212

@@ -4780,6 +4781,195 @@ describe("OpenAIOAuthPlugin runtime toast forwarding", () => {
47804781
);
47814782
});
47824783

4784+
it("falls through to rotation when short-retry attempt count reaches MAX_SHORT_RETRY_ATTEMPTS", async () => {
4785+
const { AccountManager } = await import("../lib/accounts.js");
4786+
const fetchHelpersModule = await import("../lib/request/fetch-helpers.js");
4787+
const rateLimitBackoffModule = await import("../lib/request/rate-limit-backoff.js");
4788+
4789+
const markRateLimitedWithReason = vi.fn();
4790+
const recordRateLimit = vi.fn();
4791+
const manager = {
4792+
getAccountCount: () => 1,
4793+
getCurrentOrNextForFamilyHybrid: () => ({
4794+
index: 0,
4795+
accountId: "acc-1",
4796+
email: "alpha@example.com",
4797+
refreshToken: "refresh-1",
4798+
}),
4799+
getCurrentOrNextForFamily: () => ({
4800+
index: 0,
4801+
accountId: "acc-1",
4802+
email: "alpha@example.com",
4803+
refreshToken: "refresh-1",
4804+
}),
4805+
getCurrentWorkspace: () => null,
4806+
getAccountByIndex: () => null,
4807+
getAccountsSnapshot: () => [],
4808+
isAccountAvailableForFamily: () => true,
4809+
toAuthDetails: () => ({
4810+
type: "oauth" as const,
4811+
access: "access-token",
4812+
refresh: "refresh-1",
4813+
expires: Date.now() + 60_000,
4814+
}),
4815+
hasRefreshToken: () => true,
4816+
saveToDiskDebounced: () => {},
4817+
updateFromAuth: () => {},
4818+
clearAuthFailures: () => {},
4819+
incrementAuthFailures: () => 1,
4820+
saveToDisk: async () => {},
4821+
markAccountCoolingDown: () => {},
4822+
markRateLimited: () => {},
4823+
markRateLimitedWithReason,
4824+
consumeToken: () => true,
4825+
refundToken: () => {},
4826+
syncCodexCliActiveSelectionForIndex: async () => {},
4827+
markSwitched: () => {},
4828+
removeAccount: () => {},
4829+
recordFailure: () => {},
4830+
recordSuccess: () => {},
4831+
recordRateLimit,
4832+
getMinWaitTimeForFamily: () => 0,
4833+
shouldShowAccountToast: () => true,
4834+
markToastShown: () => {},
4835+
setActiveIndex: () => null,
4836+
};
4837+
vi.spyOn(AccountManager, "loadFromDisk").mockResolvedValue(manager as never);
4838+
4839+
// Return a short cooldown (1s) but attempt=3, which is >= MAX_SHORT_RETRY_ATTEMPTS (3).
4840+
// This should fall through to rotation instead of short-retrying.
4841+
vi.mocked(fetchHelpersModule.handleErrorResponse).mockResolvedValueOnce({
4842+
response: new Response("rate limited", { status: 429 }),
4843+
rateLimit: {
4844+
retryAfterMs: 1000,
4845+
code: "rate_limit_exceeded",
4846+
},
4847+
errorBody: "rate limited",
4848+
} as never);
4849+
vi.mocked(rateLimitBackoffModule.getRateLimitBackoff).mockReturnValueOnce({
4850+
attempt: 3,
4851+
delayMs: 1000,
4852+
});
4853+
globalThis.fetch = vi
4854+
.fn()
4855+
.mockResolvedValueOnce(new Response("rate limited", { status: 429 }));
4856+
4857+
const mockClient = createMockClient();
4858+
const { OpenAIOAuthPlugin } = await import("../index.js");
4859+
const plugin = await OpenAIOAuthPlugin({ client: mockClient } as never) as unknown as PluginType;
4860+
const sdk = await plugin.auth.loader(getOAuthAuth, { options: {}, models: {} });
4861+
const response = await sdk.fetch!("https://api.openai.com/v1/chat/completions", {
4862+
method: "POST",
4863+
body: JSON.stringify({ model: "gpt-5.1" }),
4864+
});
4865+
4866+
// Should have rotated (503 returned as single-account pool exhausted)
4867+
// rather than short-retrying
4868+
expect(response.status).toBe(503);
4869+
expect(globalThis.fetch).toHaveBeenCalledTimes(1);
4870+
expect(markRateLimitedWithReason).toHaveBeenCalledWith(
4871+
expect.objectContaining({ index: 0 }),
4872+
1000,
4873+
"gpt-5.1",
4874+
expect.any(String),
4875+
"gpt-5.1",
4876+
);
4877+
expect(recordRateLimit).toHaveBeenCalled();
4878+
});
4879+
4880+
it("short-retries the same account when attempt is below MAX_SHORT_RETRY_ATTEMPTS", async () => {
4881+
const { AccountManager } = await import("../lib/accounts.js");
4882+
const fetchHelpersModule = await import("../lib/request/fetch-helpers.js");
4883+
const rateLimitBackoffModule = await import("../lib/request/rate-limit-backoff.js");
4884+
4885+
const markRateLimitedWithReason = vi.fn();
4886+
const manager = {
4887+
getAccountCount: () => 1,
4888+
getCurrentOrNextForFamilyHybrid: () => ({
4889+
index: 0,
4890+
accountId: "acc-1",
4891+
email: "alpha@example.com",
4892+
refreshToken: "refresh-1",
4893+
}),
4894+
getCurrentOrNextForFamily: () => ({
4895+
index: 0,
4896+
accountId: "acc-1",
4897+
email: "alpha@example.com",
4898+
refreshToken: "refresh-1",
4899+
}),
4900+
getCurrentWorkspace: () => null,
4901+
getAccountByIndex: () => null,
4902+
getAccountsSnapshot: () => [],
4903+
isAccountAvailableForFamily: () => true,
4904+
toAuthDetails: () => ({
4905+
type: "oauth" as const,
4906+
access: "access-token",
4907+
refresh: "refresh-1",
4908+
expires: Date.now() + 60_000,
4909+
}),
4910+
hasRefreshToken: () => true,
4911+
saveToDiskDebounced: () => {},
4912+
updateFromAuth: () => {},
4913+
clearAuthFailures: () => {},
4914+
incrementAuthFailures: () => 1,
4915+
saveToDisk: async () => {},
4916+
markAccountCoolingDown: () => {},
4917+
markRateLimited: () => {},
4918+
markRateLimitedWithReason,
4919+
consumeToken: () => true,
4920+
refundToken: () => {},
4921+
syncCodexCliActiveSelectionForIndex: async () => {},
4922+
markSwitched: () => {},
4923+
removeAccount: () => {},
4924+
recordFailure: () => {},
4925+
recordSuccess: () => {},
4926+
recordRateLimit: () => {},
4927+
getMinWaitTimeForFamily: () => 0,
4928+
shouldShowAccountToast: () => true,
4929+
markToastShown: () => {},
4930+
setActiveIndex: () => null,
4931+
};
4932+
vi.spyOn(AccountManager, "loadFromDisk").mockResolvedValue(manager as never);
4933+
4934+
// First request: 429 with attempt=2 (below MAX_SHORT_RETRY_ATTEMPTS=3) → short retry
4935+
// Second request: 200 OK
4936+
vi.mocked(fetchHelpersModule.handleErrorResponse).mockResolvedValueOnce({
4937+
response: new Response("rate limited", { status: 429 }),
4938+
rateLimit: {
4939+
retryAfterMs: 1000,
4940+
code: "rate_limit_exceeded",
4941+
},
4942+
errorBody: "rate limited",
4943+
} as never);
4944+
vi.mocked(rateLimitBackoffModule.getRateLimitBackoff).mockReturnValueOnce({
4945+
attempt: 2,
4946+
delayMs: 1000,
4947+
});
4948+
globalThis.fetch = vi
4949+
.fn()
4950+
.mockResolvedValueOnce(new Response("rate limited", { status: 429 }))
4951+
.mockResolvedValueOnce(new Response(JSON.stringify({ content: "ok" }), { status: 200 }));
4952+
4953+
const mockClient = createMockClient();
4954+
const { OpenAIOAuthPlugin } = await import("../index.js");
4955+
const plugin = await OpenAIOAuthPlugin({ client: mockClient } as never) as unknown as PluginType;
4956+
const sdk = await plugin.auth.loader(getOAuthAuth, { options: {}, models: {} });
4957+
4958+
vi.useFakeTimers();
4959+
const responsePromise = sdk.fetch!("https://api.openai.com/v1/chat/completions", {
4960+
method: "POST",
4961+
body: JSON.stringify({ model: "gpt-5.1" }),
4962+
});
4963+
await vi.advanceTimersByTimeAsync(2000);
4964+
const response = await responsePromise;
4965+
4966+
// Should have short-retried and succeeded
4967+
expect(response.status).toBe(200);
4968+
expect(globalThis.fetch).toHaveBeenCalledTimes(2);
4969+
// markRateLimitedWithReason should NOT have been called (no rotation)
4970+
expect(markRateLimitedWithReason).not.toHaveBeenCalled();
4971+
});
4972+
47834973
it("persists the longer parsed rate-limit cooldown across overlapping requests", async () => {
47844974
const { AccountManager } = await import("../lib/accounts.js");
47854975
const { AccountManager: ActualAccountManager } =

0 commit comments

Comments
 (0)