fix: notify quota scheduler on 502/503/529 overload responses

ndycode · ndycode · commit 71645a542c57 · 2026-04-06T11:31:17.000+08:00
The 5xx handler rotates accounts and applies failure policy but does
not inform the preemptiveQuotaScheduler about upstream capacity
pressure. This means the scheduler keeps sending requests into an
overloaded backend while the 429 handler already calls
markRateLimited().

Add scheduler notification for overload-specific status codes
(502 Bad Gateway, 503 Service Unavailable, 529 Overloaded) inside
the 5xx handler, mirroring the 429 handler's awareness. Generic 500
errors are excluded since they do not indicate capacity pressure.
diff --git a/index.ts b/index.ts
@@ -1809,16 +1809,32 @@ let sessionAffinityWriteVersion = 0;
 													const serverRetryAfterMs = parseRetryAfterHintMs(
 														response.headers,
 													);
-													const policy = evaluateFailurePolicy(
-														{
-															kind: "server",
-															failoverMode,
-															serverRetryAfterMs:
-																serverRetryAfterMs ?? undefined,
-														},
-														{ serverCooldownMs: serverErrorCooldownMs },
-													);
-													if (policy.refundToken) {
+							const policy = evaluateFailurePolicy(
+								{
+									kind: "server",
+									failoverMode,
+									serverRetryAfterMs:
+										serverRetryAfterMs ?? undefined,
+								},
+								{ serverCooldownMs: serverErrorCooldownMs },
+							);
+							// Overload-type server errors (502 Bad Gateway, 503 Service
+							// Unavailable, 529 Overloaded) signal upstream capacity
+							// pressure. Notify the quota scheduler so it can proactively
+							// defer subsequent requests for this quota key, mirroring the
+							// 429 handler's scheduler awareness.
+							if (
+								(response.status === 502 ||
+									response.status === 503 ||
+									response.status === 529) &&
+								typeof policy.cooldownMs === "number"
+							) {
+								preemptiveQuotaScheduler.markRateLimited(
+									quotaScheduleKey,
+									policy.cooldownMs,
+								);
+							}
+							if (policy.refundToken) {
 														accountManager.refundToken(
 															account,
 															modelFamily,
diff --git a/test/index.test.ts b/test/index.test.ts
@@ -2655,6 +2655,94 @@ describe("OpenAIOAuthPlugin fetch handler", () => {
 		dateNowSpy.mockRestore();
 	});
 
+	it("notifies preemptive quota scheduler on 503 overload responses", async () => {
+		const { PreemptiveQuotaScheduler } = await import(
+			"../lib/preemptive-quota-scheduler.js"
+		);
+		const schedulerSpy = vi.spyOn(
+			PreemptiveQuotaScheduler.prototype,
+			"markRateLimited",
+		);
+		globalThis.fetch = vi.fn().mockResolvedValue(
+			new Response("service unavailable", { status: 503 }),
+		);
+
+		const { sdk } = await setupPlugin();
+		await sdk.fetch!("https://api.openai.com/v1/chat", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+
+		expect(schedulerSpy).toHaveBeenCalled();
+		schedulerSpy.mockRestore();
+	});
+
+	it("notifies preemptive quota scheduler on 502 overload responses", async () => {
+		const { PreemptiveQuotaScheduler } = await import(
+			"../lib/preemptive-quota-scheduler.js"
+		);
+		const schedulerSpy = vi.spyOn(
+			PreemptiveQuotaScheduler.prototype,
+			"markRateLimited",
+		);
+		globalThis.fetch = vi.fn().mockResolvedValue(
+			new Response("bad gateway", { status: 502 }),
+		);
+
+		const { sdk } = await setupPlugin();
+		await sdk.fetch!("https://api.openai.com/v1/chat", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+
+		expect(schedulerSpy).toHaveBeenCalled();
+		schedulerSpy.mockRestore();
+	});
+
+	it("notifies preemptive quota scheduler on 529 overload responses", async () => {
+		const { PreemptiveQuotaScheduler } = await import(
+			"../lib/preemptive-quota-scheduler.js"
+		);
+		const schedulerSpy = vi.spyOn(
+			PreemptiveQuotaScheduler.prototype,
+			"markRateLimited",
+		);
+		globalThis.fetch = vi.fn().mockResolvedValue(
+			new Response("overloaded", { status: 529 }),
+		);
+
+		const { sdk } = await setupPlugin();
+		await sdk.fetch!("https://api.openai.com/v1/chat", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+
+		expect(schedulerSpy).toHaveBeenCalled();
+		schedulerSpy.mockRestore();
+	});
+
+	it("does not notify preemptive quota scheduler on generic 500 server errors", async () => {
+		const { PreemptiveQuotaScheduler } = await import(
+			"../lib/preemptive-quota-scheduler.js"
+		);
+		const schedulerSpy = vi.spyOn(
+			PreemptiveQuotaScheduler.prototype,
+			"markRateLimited",
+		);
+		globalThis.fetch = vi.fn().mockResolvedValue(
+			new Response("internal server error", { status: 500 }),
+		);
+
+		const { sdk } = await setupPlugin();
+		await sdk.fetch!("https://api.openai.com/v1/chat", {
+			method: "POST",
+			body: JSON.stringify({ model: "gpt-5.1" }),
+		});
+
+		expect(schedulerSpy).not.toHaveBeenCalled();
+		schedulerSpy.mockRestore();
+	});
+
 	it("falls back from gpt-5.3-codex to gpt-5.2-codex when unsupported fallback is enabled", async () => {
 		const configModule = await import("../lib/config.js");
 		const fetchHelpers = await import("../lib/request/fetch-helpers.js");