Skip to content

Commit d5011aa

Browse files
rainxchzedclaude
andcommitted
Improve platform installer detection and protect cache from empty results
Match release assets by platform keywords (macos, darwin, win64, linux) in generic archives (.zip, .tar.gz), not just by installer extension. This catches cross-platform apps shipping as e.g. myapp-macos-arm64.zip. Also never overwrite cached data when a fetch returns 0 repos. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent d038514 commit d5011aa

1 file changed

Lines changed: 39 additions & 17 deletions

File tree

scripts/fetch_all_categories.py

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,19 @@ def _is_blocked(repo: Dict) -> bool:
131131
},
132132
}
133133

134+
# Keyword patterns in release asset filenames that indicate a platform build.
135+
# Matched against lowercased asset names for files ending in .zip / .tar.gz / .7z etc.
136+
# These catch cross-platform releases like "myapp-macos-arm64.zip" or "myapp-win-x64.zip"
137+
PLATFORM_ASSET_KEYWORDS = {
138+
"android": ["android"],
139+
"windows": ["win64", "win32", "windows", "-win-", "-win.", "win-x64", "win-arm64", "windows-x64"],
140+
"macos": ["macos", "darwin", "osx", "mac-x64", "mac-arm64", "mac-universal", "-mac-", "-mac."],
141+
"linux": ["linux", "linux-x64", "linux-arm64", "-linux-", "-linux."],
142+
}
143+
144+
# Generic archive extensions to check for platform keyword matching
145+
_ARCHIVE_EXTENSIONS = (".zip", ".tar.gz", ".tar.xz", ".tar.bz2", ".7z")
146+
134147
# ─── Data classes ──────────────────────────────────────────────────────────────
135148

136149

@@ -351,19 +364,30 @@ async def get_latest_stable_release(self, owner: str, repo: str) -> ReleaseInfo:
351364

352365
info = ReleaseInfo()
353366

354-
# Try /releases/latest first (one API call)
355-
data, err = await self.get(f"https://api.github.com/repos/{full_name}/releases/latest")
356-
if data and not data.get("draft") and not data.get("prerelease"):
357-
info.has_release = True
358-
info.published_at = data.get("published_at")
359-
assets = data.get("assets", [])
360-
# Pre-check all platforms
367+
def _check_assets(assets: List[Dict]):
368+
"""Detect platform installers from release assets."""
361369
for platform, cfg in PLATFORMS.items():
370+
if info.has_installers.get(platform):
371+
continue
362372
for asset in assets:
363373
name = asset.get("name", "").lower()
374+
# Match by dedicated installer extension (.exe, .dmg, .deb, etc.)
364375
if any(name.endswith(ext) for ext in cfg["installer_extensions"]):
365376
info.has_installers[platform] = True
366377
break
378+
# Match generic archives (.zip, .tar.gz) by platform keyword in name
379+
if any(name.endswith(ext) for ext in _ARCHIVE_EXTENSIONS):
380+
keywords = PLATFORM_ASSET_KEYWORDS.get(platform, [])
381+
if any(kw in name for kw in keywords):
382+
info.has_installers[platform] = True
383+
break
384+
385+
# Try /releases/latest first (one API call)
386+
data, err = await self.get(f"https://api.github.com/repos/{full_name}/releases/latest")
387+
if data and not data.get("draft") and not data.get("prerelease"):
388+
info.has_release = True
389+
info.published_at = data.get("published_at")
390+
_check_assets(data.get("assets", []))
367391
self.release_cache[full_name] = info
368392
return info
369393

@@ -377,13 +401,7 @@ async def get_latest_stable_release(self, owner: str, repo: str) -> ReleaseInfo:
377401
if not release.get("draft") and not release.get("prerelease"):
378402
info.has_release = True
379403
info.published_at = release.get("published_at")
380-
assets = release.get("assets", [])
381-
for platform, cfg in PLATFORMS.items():
382-
for asset in assets:
383-
name = asset.get("name", "").lower()
384-
if any(name.endswith(ext) for ext in cfg["installer_extensions"]):
385-
info.has_installers[platform] = True
386-
break
404+
_check_assets(release.get("assets", []))
387405
break
388406

389407
self.release_cache[full_name] = info
@@ -898,15 +916,19 @@ async def process_platform(platform: str, budget: int):
898916
return
899917
repos = await fetch_fn(client, platform, budget)
900918

901-
# Never overwrite good cached data with empty results
919+
# Never save 0 repos — preserve whatever exists in cache
920+
if len(repos) == 0:
921+
existing = _load_existing_count(category, platform)
922+
print(f" ⚠ 0 repos fetched — skipping save (existing cache: {existing} repos)")
923+
return
924+
925+
# Don't overwrite good cached data with poor results
902926
min_threshold = 10 if category == "new-releases" else 30
903927
if len(repos) < min_threshold:
904928
existing = _load_existing_count(category, platform)
905929
if existing >= min_threshold:
906930
print(f" ⚠ Only {len(repos)} repos fetched but cache has {existing} — keeping cached data")
907931
return
908-
elif len(repos) == 0:
909-
print(f" ⚠ 0 repos fetched and no good cache — saving empty result")
910932

911933
save_data(category, platform, repos, timestamp)
912934

0 commit comments

Comments
 (0)