|
11 | 11 | import asyncio |
12 | 12 | import aiohttp |
13 | 13 | import time |
| 14 | +import collections |
14 | 15 | from datetime import datetime, timedelta, timezone |
15 | 16 | from typing import List, Dict, Optional, Tuple, Set |
16 | 17 | from dataclasses import dataclass, field |
@@ -55,6 +56,8 @@ def get_token(category: str) -> str: |
55 | 56 | REQUEST_TIMEOUT = 20 # seconds |
56 | 57 | MAX_RETRIES = 3 |
57 | 58 | RATE_LIMIT_FLOOR = 50 # stop verifying when remaining requests drop below this |
| 59 | +SEARCH_RATE_LIMIT = 28 # max search API calls per window (GitHub allows 30, leave buffer) |
| 60 | +SEARCH_RATE_WINDOW = 60 # sliding window in seconds |
58 | 61 | FORCE_REFRESH = os.environ.get("FORCE_REFRESH", "").lower() in ("true", "1", "yes") |
59 | 62 |
|
60 | 63 | # Topics / keywords that indicate NSFW or inappropriate content. |
@@ -244,6 +247,9 @@ def __init__(self, token: str): |
244 | 247 | self._request_count = 0 |
245 | 248 | self._rate_remaining = 5000 |
246 | 249 | self._rate_reset: Optional[float] = None |
| 250 | + # Search API rate limiter (sliding window) |
| 251 | + self._search_call_times: collections.deque = collections.deque() |
| 252 | + self._search_rate_lock = asyncio.Lock() |
247 | 253 | # Cross-repo release cache: full_name -> ReleaseInfo |
248 | 254 | self.release_cache: Dict[str, ReleaseInfo] = {} |
249 | 255 |
|
@@ -282,8 +288,24 @@ def _update_rate_info(self, headers, url: str): |
282 | 288 | if reset is not None: |
283 | 289 | self._rate_reset = float(reset) |
284 | 290 |
|
| 291 | + async def _acquire_search_slot(self): |
| 292 | + """Ensure we stay within the search API rate limit (30 req/min).""" |
| 293 | + async with self._search_rate_lock: |
| 294 | + now = time.time() |
| 295 | + while self._search_call_times and now - self._search_call_times[0] >= SEARCH_RATE_WINDOW: |
| 296 | + self._search_call_times.popleft() |
| 297 | + if len(self._search_call_times) >= SEARCH_RATE_LIMIT: |
| 298 | + oldest = self._search_call_times[0] |
| 299 | + wait = SEARCH_RATE_WINDOW - (now - oldest) + 1 |
| 300 | + if wait > 0: |
| 301 | + print(f" ⏳ Search rate limit: {len(self._search_call_times)}/{SEARCH_RATE_LIMIT} in last 60s, pausing {wait:.0f}s") |
| 302 | + await asyncio.sleep(wait) |
| 303 | + self._search_call_times.append(time.time()) |
| 304 | + |
285 | 305 | async def get(self, url: str, params: Optional[Dict] = None) -> Tuple[Optional[Dict], Optional[str]]: |
286 | 306 | """GET with retry, rate-limit handling, and concurrency control.""" |
| 307 | + if "/search/" in url: |
| 308 | + await self._acquire_search_slot() |
287 | 309 | async with self._sem: |
288 | 310 | await self._wait_for_rate_limit() |
289 | 311 | for attempt in range(MAX_RETRIES): |
@@ -333,6 +355,7 @@ async def search_repos(self, query: str, sort: str = "stars", order: str = "desc |
333 | 355 | params={"q": query, "sort": sort, "order": order, "per_page": 100, "page": 1}, |
334 | 356 | ) |
335 | 357 | if not data: |
| 358 | + print(f" ⚠ Search query failed: {err}") |
336 | 359 | return [] |
337 | 360 |
|
338 | 361 | items = data.get("items", []) |
@@ -941,21 +964,14 @@ async def process_platform(platform: str, budget: int): |
941 | 964 |
|
942 | 965 | # Process platforms SEQUENTIALLY to avoid rate-limit thrashing. |
943 | 966 | # The release cache still benefits later platforms from earlier ones. |
944 | | - # Pause between platforms so the search API rate limit (30 req/min) can reset. |
| 967 | + # Search API pacing is handled by _acquire_search_slot() (sliding window rate limiter). |
945 | 968 | platforms_left = list(PLATFORMS.keys()) |
946 | | - prev_ran_searches = False |
947 | 969 | for i, p in enumerate(platforms_left): |
948 | 970 | # Recalculate budget for remaining platforms so unused budget carries forward |
949 | 971 | platforms_remaining = num_platforms - i |
950 | 972 | budget = max((client._rate_remaining - RATE_LIMIT_FLOOR) // platforms_remaining, 100) |
951 | 973 |
|
952 | | - # Wait for search rate limit (30 req/min) to reset between platforms |
953 | | - if prev_ran_searches: |
954 | | - print(f" ⏳ Waiting 65s for search API rate limit reset...") |
955 | | - await asyncio.sleep(65) |
956 | | - |
957 | | - ran = await process_platform(p, budget) |
958 | | - prev_ran_searches = ran |
| 974 | + await process_platform(p, budget) |
959 | 975 |
|
960 | 976 |
|
961 | 977 | async def main(): |
|
0 commit comments