feat: add retryWithBackoff (7 attempts) and RequestQueue throttling (1s + jitter)

luanweslley77 · luanweslley77 · commit 5ac70f3e8a8b · 2026-03-15T22:34:11.000-03:00
- Implement retryWithBackoff with exponential backoff and +/- 30% jitter
- Support for Retry-After header from server
- RequestQueue with 1s minimum interval + random jitter (0.5-1.5s)
- Prevents hitting 60 req/min rate limit
- Up to 7 retry attempts for transient errors (429, 5xx)
diff --git a/README.md b/README.md
@@ -25,6 +25,9 @@
 - 🎯 **Rate Limit Fix** - Official headers prevent aggressive rate limiting (Fixes #4)
 - 🔍 **Session Tracking** - Unique session/prompt IDs for proper quota recognition
 - 🎯 **Aligned with qwen-code** - Exposes same models as official Qwen Code CLI
+- ⏱️ **Request Throttling** - 1-2.5s intervals between requests (prevents 60 req/min limit)
+- 🔄 **Automatic Retry** - Exponential backoff with jitter for 429/5xx errors (up to 7 attempts)
+- 📡 **Retry-After Support** - Respects server's Retry-After header when rate limited
 
 ## 🆕 What's New in v1.5.0
 
@@ -40,6 +43,21 @@
 
 **Result:** Full daily quota now available without premature rate limiting.
 
+### Automatic Retry & Throttling (v1.5.0+)
+
+**Request Throttling:**
+- Minimum 1 second interval between requests
+- Additional 0.5-1.5s random jitter (more human-like)
+- Prevents hitting 60 req/min limit
+
+**Automatic Retry:**
+- Up to 7 retry attempts for transient errors
+- Exponential backoff with +/- 30% jitter
+- Respects `Retry-After` header from server
+- Retries on 429 (rate limit) and 5xx (server errors)
+
+**Result:** Smoother request flow and automatic recovery from rate limiting.
+
 ### Dynamic API Endpoint Resolution
 
 The plugin now automatically detects and uses the correct API endpoint based on the `resource_url` returned by the OAuth server:
diff --git a/README.pt-BR.md b/README.pt-BR.md
@@ -25,6 +25,9 @@
 - 🎯 **Correção de Rate Limit** - Headers oficiais previnem rate limiting agressivo (Fix #4)
 - 🔍 **Session Tracking** - IDs únicos de sessão/prompt para reconhecimento de cota
 - 🎯 **Alinhado com qwen-code** - Expõe os mesmos modelos do Qwen Code CLI oficial
+- ⏱️ **Throttling de Requisições** - Intervalos de 1-2.5s entre requisições (previne limite de 60 req/min)
+- 🔄 **Retry Automático** - Backoff exponencial com jitter para erros 429/5xx (até 7 tentativas)
+- 📡 **Suporte a Retry-After** - Respeita header Retry-After do servidor quando rate limited
 
 ## 📋 Pré-requisitos
 
diff --git a/src/index.ts b/src/index.ts
@@ -12,6 +12,7 @@ import { spawn } from 'node:child_process';
 
 import { QWEN_PROVIDER_ID, QWEN_API_CONFIG, QWEN_MODELS, QWEN_OFFICIAL_HEADERS } from './constants.js';
 import type { QwenCredentials } from './types.js';
+import type { HttpError } from './utils/retry.js';
 import { saveCredentials, loadCredentials, resolveBaseUrl } from './plugin/auth.js';
 import {
   generatePKCE,
@@ -22,10 +23,15 @@ import {
   SlowDownError,
 } from './qwen/oauth.js';
 import { logTechnicalDetail } from './errors.js';
+import { retryWithBackoff } from './utils/retry.js';
+import { RequestQueue } from './plugin/request-queue.js';
 
 // Global session ID for the plugin lifetime
 const PLUGIN_SESSION_ID = crypto.randomUUID();
 
+// Singleton request queue for throttling (shared across all requests)
+const requestQueue = new RequestQueue();
+
 // ============================================
 // Helpers
 // ============================================
@@ -108,7 +114,48 @@ export const QwenAuthPlugin = async (_input: unknown) => {
               promptId: crypto.randomUUID(),
               source: 'opencode-qwencode-auth'
             })
-          }
+          },
+          // Custom fetch with throttling and retry
+          fetch: async (url: string, options?: RequestInit) => {
+            return requestQueue.enqueue(async () => {
+              return retryWithBackoff(
+                async () => {
+                  // Generate new promptId for each request
+                  const headers = new Headers(options?.headers);
+                  headers.set('Authorization', `Bearer ${accessToken}`);
+                  headers.set(
+                    'X-Metadata',
+                    JSON.stringify({
+                      sessionId: PLUGIN_SESSION_ID,
+                      promptId: crypto.randomUUID(),
+                      source: 'opencode-qwencode-auth',
+                    })
+                  );
+
+                  const response = await fetch(url, {
+                    ...options,
+                    headers,
+                  });
+
+                  if (!response.ok) {
+                    const errorText = await response.text().catch(() => '');
+                    const error = new Error(`HTTP ${response.status}: ${errorText}`) as HttpError & { status?: number };
+                    error.status = response.status;
+                    (error as any).response = response;
+                    throw error;
+                  }
+
+                  return response;
+                },
+                {
+                  authType: 'qwen-oauth',
+                  maxAttempts: 7,
+                  initialDelayMs: 1500,
+                  maxDelayMs: 30000,
+                }
+              );
+            });
+          },
         };
       },
 
diff --git a/src/plugin/request-queue.ts b/src/plugin/request-queue.ts
@@ -0,0 +1,46 @@
+/**
+ * Request Queue with throttling
+ * Prevents hitting rate limits by controlling request frequency
+ * Inspired by qwen-code-0.12.0 throttling patterns
+ */
+
+import { createDebugLogger } from '../utils/debug-logger.js';
+
+const debugLogger = createDebugLogger('REQUEST_QUEUE');
+
+export class RequestQueue {
+  private lastRequestTime = 0;
+  private readonly MIN_INTERVAL = 1000; // 1 second
+  private readonly JITTER_MIN = 500;    // 0.5s
+  private readonly JITTER_MAX = 1500;   // 1.5s
+
+  /**
+   * Get random jitter between JITTER_MIN and JITTER_MAX
+   */
+  private getJitter(): number {
+    return Math.random() * (this.JITTER_MAX - this.JITTER_MIN) + this.JITTER_MIN;
+  }
+
+  /**
+   * Execute a function with throttling
+   * Ensures minimum interval between requests + random jitter
+   */
+  async enqueue<T>(fn: () => Promise<T>): Promise<T> {
+    const elapsed = Date.now() - this.lastRequestTime;
+    const waitTime = Math.max(0, this.MIN_INTERVAL - elapsed);
+    
+    if (waitTime > 0) {
+      const jitter = this.getJitter();
+      const totalWait = waitTime + jitter;
+      
+      debugLogger.info(
+        `Throttling: waiting ${totalWait.toFixed(0)}ms (${waitTime.toFixed(0)}ms + ${jitter.toFixed(0)}ms jitter)`
+      );
+      
+      await new Promise(resolve => setTimeout(resolve, totalWait));
+    }
+    
+    this.lastRequestTime = Date.now();
+    return fn();
+  }
+}
diff --git a/src/qwen/oauth.ts b/src/qwen/oauth.ts
@@ -10,6 +10,7 @@ import { randomBytes, createHash, randomUUID } from 'node:crypto';
 import { QWEN_OAUTH_CONFIG } from '../constants.js';
 import type { QwenCredentials } from '../types.js';
 import { QwenAuthError, logTechnicalDetail } from '../errors.js';
+import { retryWithBackoff, getErrorStatus } from '../utils/retry.js';
 
 /**
  * Erro lançado quando o servidor pede slow_down (RFC 8628)
@@ -178,6 +179,7 @@ export function tokenResponseToCredentials(tokenResponse: TokenResponse): QwenCr
 
 /**
  * Refresh the access token using refresh_token grant
+ * Includes automatic retry for transient errors (429, 5xx)
  */
 export async function refreshAccessToken(refreshToken: string): Promise<QwenCredentials> {
   const bodyData = {
@@ -186,31 +188,55 @@ export async function refreshAccessToken(refreshToken: string): Promise<QwenCred
     client_id: QWEN_OAUTH_CONFIG.clientId,
   };
 
-  const response = await fetch(QWEN_OAUTH_CONFIG.tokenEndpoint, {
-    method: 'POST',
-    headers: {
-      'Content-Type': 'application/x-www-form-urlencoded',
-      Accept: 'application/json',
-    },
-    body: objectToUrlEncoded(bodyData),
-  });
-
-  if (!response.ok) {
-    const errorText = await response.text();
-    logTechnicalDetail(`Token refresh HTTP ${response.status}: ${errorText}`);
-    throw new QwenAuthError('refresh_failed', `HTTP ${response.status}: ${errorText}`);
-  }
+  return retryWithBackoff(
+    async () => {
+      const response = await fetch(QWEN_OAUTH_CONFIG.tokenEndpoint, {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/x-www-form-urlencoded',
+          Accept: 'application/json',
+        },
+        body: objectToUrlEncoded(bodyData),
+      });
+
+      if (!response.ok) {
+        const errorText = await response.text();
+        logTechnicalDetail(`Token refresh HTTP ${response.status}: ${errorText}`);
+        
+        // Don't retry on invalid_grant (refresh token expired/revoked)
+        if (errorText.includes('invalid_grant')) {
+          throw new QwenAuthError('invalid_grant', 'Refresh token expired or revoked');
+        }
+        
+        throw new QwenAuthError('refresh_failed', `HTTP ${response.status}: ${errorText}`);
+      }
 
-  const data = await response.json() as TokenResponse;
+      const data = await response.json() as TokenResponse;
 
-  return {
-    accessToken: data.access_token,
-    tokenType: data.token_type || 'Bearer',
-    refreshToken: data.refresh_token || refreshToken,
-    resourceUrl: data.resource_url,
-    expiryDate: Date.now() + data.expires_in * 1000,
-    scope: data.scope,
-  };
+      return {
+        accessToken: data.access_token,
+        tokenType: data.token_type || 'Bearer',
+        refreshToken: data.refresh_token || refreshToken,
+        resourceUrl: data.resource_url,
+        expiryDate: Date.now() + data.expires_in * 1000,
+        scope: data.scope,
+      };
+    },
+    {
+      maxAttempts: 5,
+      initialDelayMs: 1000,
+      maxDelayMs: 15000,
+      shouldRetryOnError: (error) => {
+        // Don't retry on invalid_grant errors
+        if (error.message.includes('invalid_grant')) {
+          return false;
+        }
+        // Retry on 429 or 5xx errors
+        const status = getErrorStatus(error);
+        return status === 429 || (status !== undefined && status >= 500 && status < 600);
+      },
+    }
+  );
 }
 
 /**
diff --git a/src/utils/debug-logger.ts b/src/utils/debug-logger.ts
@@ -0,0 +1,40 @@
+/**
+ * Debug logger utility
+ * Only outputs when OPENCODE_QWEN_DEBUG=1 is set
+ */
+
+const DEBUG_ENABLED = process.env.OPENCODE_QWEN_DEBUG === '1';
+
+export interface DebugLogger {
+  info: (message: string, ...args: unknown[]) => void;
+  warn: (message: string, ...args: unknown[]) => void;
+  error: (message: string, ...args: unknown[]) => void;
+  debug: (message: string, ...args: unknown[]) => void;
+}
+
+export function createDebugLogger(prefix: string): DebugLogger {
+  const logPrefix = `[${prefix}]`;
+
+  return {
+    info: (message: string, ...args: unknown[]) => {
+      if (DEBUG_ENABLED) {
+        console.log(`${logPrefix} [INFO] ${message}`, ...args);
+      }
+    },
+    warn: (message: string, ...args: unknown[]) => {
+      if (DEBUG_ENABLED) {
+        console.warn(`${logPrefix} [WARN] ${message}`, ...args);
+      }
+    },
+    error: (message: string, ...args: unknown[]) => {
+      if (DEBUG_ENABLED) {
+        console.error(`${logPrefix} [ERROR] ${message}`, ...args);
+      }
+    },
+    debug: (message: string, ...args: unknown[]) => {
+      if (DEBUG_ENABLED) {
+        console.log(`${logPrefix} [DEBUG] ${message}`, ...args);
+      }
+    },
+  };
+}
diff --git a/src/utils/retry.ts b/src/utils/retry.ts
diff --git a/tests/debug.ts b/tests/debug.ts