Skip to content

Commit ef997e1

Browse files
committed
fix: stop quota scheduler cooldown drift
The preemptive quota scheduler calculated deferrals from the nearest reset window. When a later primary or secondary reset was still active, that shortened the effective cooldown and let requests resume too early. Use the longest relevant reset window for 429 deferrals and for near-exhaustion quota deferrals so cooldowns stay aligned with the latest active quota window.
1 parent 00ec8ac commit ef997e1

2 files changed

Lines changed: 55 additions & 6 deletions

File tree

lib/preemptive-quota-scheduler.ts

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -238,14 +238,27 @@ export class PreemptiveQuotaScheduler {
238238
const snapshot = this.snapshots.get(key);
239239
if (!snapshot) return { defer: false, waitMs: 0 };
240240

241+
const primaryWait =
242+
typeof snapshot.primary.resetAtMs === "number" &&
243+
Number.isFinite(snapshot.primary.resetAtMs) &&
244+
snapshot.primary.resetAtMs > now
245+
? snapshot.primary.resetAtMs - now
246+
: 0;
247+
const secondaryWait =
248+
typeof snapshot.secondary.resetAtMs === "number" &&
249+
Number.isFinite(snapshot.secondary.resetAtMs) &&
250+
snapshot.secondary.resetAtMs > now
251+
? snapshot.secondary.resetAtMs - now
252+
: 0;
253+
241254
const waitCandidates = [snapshot.primary.resetAtMs, snapshot.secondary.resetAtMs]
242255
.filter((value): value is number => typeof value === "number" && Number.isFinite(value) && value > now)
243256
.map((value) => value - now)
244257
.filter((value) => value > 0);
245-
const nearestWait = waitCandidates.length > 0 ? Math.min(...waitCandidates) : 0;
258+
const longestWait = waitCandidates.length > 0 ? Math.max(...waitCandidates) : 0;
246259

247-
if (snapshot.status === 429 && nearestWait > 0) {
248-
const bounded = Math.min(nearestWait, this.maxDeferralMs);
260+
if (snapshot.status === 429 && longestWait > 0) {
261+
const bounded = Math.min(longestWait, this.maxDeferralMs);
249262
if (bounded > 0) {
250263
return { defer: true, waitMs: bounded, reason: "rate-limit" };
251264
}
@@ -259,9 +272,12 @@ export class PreemptiveQuotaScheduler {
259272
typeof snapshot.secondary.usedPercent === "number" &&
260273
Number.isFinite(snapshot.secondary.usedPercent) &&
261274
snapshot.secondary.usedPercent >= 100 - this.secondaryRemainingPercentThreshold;
262-
const nearExhausted = primaryNearExhausted || secondaryNearExhausted;
263-
if (nearExhausted && nearestWait > 0) {
264-
const bounded = Math.min(nearestWait, this.maxDeferralMs);
275+
const nearExhaustedWait = Math.max(
276+
primaryNearExhausted ? primaryWait : 0,
277+
secondaryNearExhausted ? secondaryWait : 0,
278+
);
279+
if (nearExhaustedWait > 0) {
280+
const bounded = Math.min(nearExhaustedWait, this.maxDeferralMs);
265281
if (bounded > 0) {
266282
return { defer: true, waitMs: bounded, reason: "quota-near-exhaustion" };
267283
}

test/preemptive-quota-scheduler.test.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,21 @@ describe("preemptive quota scheduler", () => {
8787
expect(decision.waitMs).toBe(25_000);
8888
});
8989

90+
it("uses the longest active reset window for 429 deferrals", () => {
91+
const scheduler = new PreemptiveQuotaScheduler();
92+
scheduler.update("acc:model", {
93+
status: 429,
94+
primary: { resetAtMs: 31_000 },
95+
secondary: { resetAtMs: 61_000 },
96+
updatedAt: 1_000,
97+
});
98+
99+
const decision = scheduler.getDeferral("acc:model", 6_000);
100+
expect(decision.defer).toBe(true);
101+
expect(decision.reason).toBe("rate-limit");
102+
expect(decision.waitMs).toBe(55_000);
103+
});
104+
90105
it("preserves secondary near-exhaustion state when marking a quota key rate-limited", () => {
91106
const scheduler = new PreemptiveQuotaScheduler({
92107
remainingPercentThresholdSecondary: 5,
@@ -141,6 +156,24 @@ describe("preemptive quota scheduler", () => {
141156
expect(decision.reason).toBe("quota-near-exhaustion");
142157
});
143158

159+
it("uses the longest near-exhausted reset window for quota deferrals", () => {
160+
const scheduler = new PreemptiveQuotaScheduler({
161+
remainingPercentThresholdPrimary: 5,
162+
remainingPercentThresholdSecondary: 5,
163+
});
164+
scheduler.update("acc:model", {
165+
status: 200,
166+
primary: { usedPercent: 96, resetAtMs: 70_000 },
167+
secondary: { usedPercent: 97, resetAtMs: 120_000 },
168+
updatedAt: 10_000,
169+
});
170+
171+
const decision = scheduler.getDeferral("acc:model", 20_000);
172+
expect(decision.defer).toBe(true);
173+
expect(decision.reason).toBe("quota-near-exhaustion");
174+
expect(decision.waitMs).toBe(100_000);
175+
});
176+
144177
it("prunes expired snapshots", () => {
145178
const scheduler = new PreemptiveQuotaScheduler();
146179
scheduler.update("a", {

0 commit comments

Comments
 (0)