Skip to content

Commit 7713f99

Browse files
committed
The fix: a 65-second pause between platforms that actually run searches (cached platforms skip it). This lets the search API's 30/min limit fully reset.
1 parent 74c2a6c commit 7713f99

1 file changed

Lines changed: 15 additions & 4 deletions

File tree

scripts/fetch_all_categories.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -911,26 +911,28 @@ async def process_category(client: GitHubClient, category: str, fetch_fn, timest
911911
num_platforms = len(PLATFORMS)
912912

913913
async def process_platform(platform: str, budget: int):
914+
"""Fetch and save repos for one platform. Returns True if searches were run."""
914915
cached = load_cache(category, platform)
915916
if cached:
916-
return
917+
return False
917918
repos = await fetch_fn(client, platform, budget)
918919

919920
# Never save 0 repos — preserve whatever exists in cache
920921
if len(repos) == 0:
921922
existing = _load_existing_count(category, platform)
922923
print(f" ⚠ 0 repos fetched — skipping save (existing cache: {existing} repos)")
923-
return
924+
return True
924925

925926
# Don't overwrite good cached data with poor results
926927
min_threshold = 10 if category == "new-releases" else 30
927928
if len(repos) < min_threshold:
928929
existing = _load_existing_count(category, platform)
929930
if existing >= min_threshold:
930931
print(f" ⚠ Only {len(repos)} repos fetched but cache has {existing} — keeping cached data")
931-
return
932+
return True
932933

933934
save_data(category, platform, repos, timestamp)
935+
return True
934936

935937
# Compute per-platform budget: divide remaining requests evenly
936938
remaining = client._rate_remaining
@@ -939,12 +941,21 @@ async def process_platform(platform: str, budget: int):
939941

940942
# Process platforms SEQUENTIALLY to avoid rate-limit thrashing.
941943
# The release cache still benefits later platforms from earlier ones.
944+
# Pause between platforms so the search API rate limit (30 req/min) can reset.
942945
platforms_left = list(PLATFORMS.keys())
946+
prev_ran_searches = False
943947
for i, p in enumerate(platforms_left):
944948
# Recalculate budget for remaining platforms so unused budget carries forward
945949
platforms_remaining = num_platforms - i
946950
budget = max((client._rate_remaining - RATE_LIMIT_FLOOR) // platforms_remaining, 100)
947-
await process_platform(p, budget)
951+
952+
# Wait for search rate limit (30 req/min) to reset between platforms
953+
if prev_ran_searches:
954+
print(f" ⏳ Waiting 65s for search API rate limit reset...")
955+
await asyncio.sleep(65)
956+
957+
ran = await process_platform(p, budget)
958+
prev_ran_searches = ran
948959

949960

950961
async def main():

0 commit comments

Comments
 (0)