Skip to content

Commit 71645a5

Browse files
committed
fix: notify quota scheduler on 502/503/529 overload responses
The 5xx handler rotates accounts and applies failure policy but does not inform the preemptiveQuotaScheduler about upstream capacity pressure. This means the scheduler keeps sending requests into an overloaded backend while the 429 handler already calls markRateLimited(). Add scheduler notification for overload-specific status codes (502 Bad Gateway, 503 Service Unavailable, 529 Overloaded) inside the 5xx handler, mirroring the 429 handler's awareness. Generic 500 errors are excluded since they do not indicate capacity pressure.
1 parent ef997e1 commit 71645a5

2 files changed

Lines changed: 114 additions & 10 deletions

File tree

index.ts

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1809,16 +1809,32 @@ let sessionAffinityWriteVersion = 0;
18091809
const serverRetryAfterMs = parseRetryAfterHintMs(
18101810
response.headers,
18111811
);
1812-
const policy = evaluateFailurePolicy(
1813-
{
1814-
kind: "server",
1815-
failoverMode,
1816-
serverRetryAfterMs:
1817-
serverRetryAfterMs ?? undefined,
1818-
},
1819-
{ serverCooldownMs: serverErrorCooldownMs },
1820-
);
1821-
if (policy.refundToken) {
1812+
const policy = evaluateFailurePolicy(
1813+
{
1814+
kind: "server",
1815+
failoverMode,
1816+
serverRetryAfterMs:
1817+
serverRetryAfterMs ?? undefined,
1818+
},
1819+
{ serverCooldownMs: serverErrorCooldownMs },
1820+
);
1821+
// Overload-type server errors (502 Bad Gateway, 503 Service
1822+
// Unavailable, 529 Overloaded) signal upstream capacity
1823+
// pressure. Notify the quota scheduler so it can proactively
1824+
// defer subsequent requests for this quota key, mirroring the
1825+
// 429 handler's scheduler awareness.
1826+
if (
1827+
(response.status === 502 ||
1828+
response.status === 503 ||
1829+
response.status === 529) &&
1830+
typeof policy.cooldownMs === "number"
1831+
) {
1832+
preemptiveQuotaScheduler.markRateLimited(
1833+
quotaScheduleKey,
1834+
policy.cooldownMs,
1835+
);
1836+
}
1837+
if (policy.refundToken) {
18221838
accountManager.refundToken(
18231839
account,
18241840
modelFamily,

test/index.test.ts

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2655,6 +2655,94 @@ describe("OpenAIOAuthPlugin fetch handler", () => {
26552655
dateNowSpy.mockRestore();
26562656
});
26572657

2658+
it("notifies preemptive quota scheduler on 503 overload responses", async () => {
2659+
const { PreemptiveQuotaScheduler } = await import(
2660+
"../lib/preemptive-quota-scheduler.js"
2661+
);
2662+
const schedulerSpy = vi.spyOn(
2663+
PreemptiveQuotaScheduler.prototype,
2664+
"markRateLimited",
2665+
);
2666+
globalThis.fetch = vi.fn().mockResolvedValue(
2667+
new Response("service unavailable", { status: 503 }),
2668+
);
2669+
2670+
const { sdk } = await setupPlugin();
2671+
await sdk.fetch!("https://api.openai.com/v1/chat", {
2672+
method: "POST",
2673+
body: JSON.stringify({ model: "gpt-5.1" }),
2674+
});
2675+
2676+
expect(schedulerSpy).toHaveBeenCalled();
2677+
schedulerSpy.mockRestore();
2678+
});
2679+
2680+
it("notifies preemptive quota scheduler on 502 overload responses", async () => {
2681+
const { PreemptiveQuotaScheduler } = await import(
2682+
"../lib/preemptive-quota-scheduler.js"
2683+
);
2684+
const schedulerSpy = vi.spyOn(
2685+
PreemptiveQuotaScheduler.prototype,
2686+
"markRateLimited",
2687+
);
2688+
globalThis.fetch = vi.fn().mockResolvedValue(
2689+
new Response("bad gateway", { status: 502 }),
2690+
);
2691+
2692+
const { sdk } = await setupPlugin();
2693+
await sdk.fetch!("https://api.openai.com/v1/chat", {
2694+
method: "POST",
2695+
body: JSON.stringify({ model: "gpt-5.1" }),
2696+
});
2697+
2698+
expect(schedulerSpy).toHaveBeenCalled();
2699+
schedulerSpy.mockRestore();
2700+
});
2701+
2702+
it("notifies preemptive quota scheduler on 529 overload responses", async () => {
2703+
const { PreemptiveQuotaScheduler } = await import(
2704+
"../lib/preemptive-quota-scheduler.js"
2705+
);
2706+
const schedulerSpy = vi.spyOn(
2707+
PreemptiveQuotaScheduler.prototype,
2708+
"markRateLimited",
2709+
);
2710+
globalThis.fetch = vi.fn().mockResolvedValue(
2711+
new Response("overloaded", { status: 529 }),
2712+
);
2713+
2714+
const { sdk } = await setupPlugin();
2715+
await sdk.fetch!("https://api.openai.com/v1/chat", {
2716+
method: "POST",
2717+
body: JSON.stringify({ model: "gpt-5.1" }),
2718+
});
2719+
2720+
expect(schedulerSpy).toHaveBeenCalled();
2721+
schedulerSpy.mockRestore();
2722+
});
2723+
2724+
it("does not notify preemptive quota scheduler on generic 500 server errors", async () => {
2725+
const { PreemptiveQuotaScheduler } = await import(
2726+
"../lib/preemptive-quota-scheduler.js"
2727+
);
2728+
const schedulerSpy = vi.spyOn(
2729+
PreemptiveQuotaScheduler.prototype,
2730+
"markRateLimited",
2731+
);
2732+
globalThis.fetch = vi.fn().mockResolvedValue(
2733+
new Response("internal server error", { status: 500 }),
2734+
);
2735+
2736+
const { sdk } = await setupPlugin();
2737+
await sdk.fetch!("https://api.openai.com/v1/chat", {
2738+
method: "POST",
2739+
body: JSON.stringify({ model: "gpt-5.1" }),
2740+
});
2741+
2742+
expect(schedulerSpy).not.toHaveBeenCalled();
2743+
schedulerSpy.mockRestore();
2744+
});
2745+
26582746
it("falls back from gpt-5.3-codex to gpt-5.2-codex when unsupported fallback is enabled", async () => {
26592747
const configModule = await import("../lib/config.js");
26602748
const fetchHelpers = await import("../lib/request/fetch-helpers.js");

0 commit comments

Comments
 (0)