Skip to content

Commit 0fdba17

Browse files
committed
should produce significantly more relevant results (repos that actually match the category AND platform) while using fewer API calls
1 parent 6e7cfed commit 0fdba17

1 file changed

Lines changed: 32 additions & 19 deletions

File tree

scripts/fetch_all_categories.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -920,40 +920,53 @@ async def fetch_topic(
920920

921921
specs = []
922922

923-
# 1) Topic-based searches: cross topic categories with platform topics
924-
# e.g. "topic:privacy topic:android" — most relevant results
925-
for topic in topics[:8]: # top 8 topics to avoid too many queries
923+
# Strategy: fewer, broader queries to stay within search rate limits.
924+
# Each query uses OR to combine multiple category topics, reducing total
925+
# search API calls from ~78 to ~18 per topic×platform.
926+
927+
# 1) Batch category topics into groups of 4, cross with platform topics
928+
# e.g. "(topic:terminal OR topic:developer-tools OR topic:cli OR topic:ide)"
929+
# AND "(topic:android OR topic:android-app)"
930+
# Using AND between groups (implicit) so results must match BOTH category AND platform.
931+
# Note: search_repos() prepends "fork:true" automatically.
932+
topic_batches = [topics[i:i+4] for i in range(0, min(len(topics), 12), 4)]
933+
for batch in topic_batches:
926934
base = f"stars:>10 archived:false pushed:>={one_year}"
935+
cat_or = " OR ".join(f"topic:{t}" for t in batch)
936+
plat_or = " OR ".join(f"topic:{t}" for t in platform_topics[:2])
927937
specs.append({
928-
"query": _build_query(base, topics=[topic] + platform_topics[:2]),
938+
"query": f"{base} ({cat_or}) ({plat_or})",
929939
"sort": "stars", "pages": 3, "weight": 1.5,
930940
})
931941

932-
# 2) Keyword in description/name + platform topic
942+
# 2) Keywords in name/description + platform topic (top 3 keywords)
933943
for kw in keywords[:3]:
934944
base = f"stars:>20 archived:false pushed:>={one_year}"
935945
specs.append({
936946
"query": _build_query(base, topics=platform_topics[:2], description_kw=kw),
937947
"sort": "stars", "pages": 3, "weight": 1.2,
938948
})
939949

940-
# 3) Topic + primary language (catches repos without platform topic)
941-
for topic in topics[:5]:
942-
for lang in all_langs[:2]:
943-
base = f"stars:>20 archived:false pushed:>={one_year}"
944-
specs.append({
945-
"query": _build_query(base, topics=[topic], language=lang),
946-
"sort": "stars", "pages": 3, "weight": 1.0,
947-
})
948-
949-
# 4) Broader: topic only, high stars (platform-agnostic gems)
950-
for topic in topics[:5]:
951-
base = f"stars:>500 archived:false pushed:>={two_years}"
950+
# 3) All category topics combined + primary language (catches repos without platform topic)
951+
# Single query per language instead of per-batch to reduce total API calls.
952+
cat_lang_or = " OR ".join(f"topic:{t}" for t in topics[:8])
953+
for lang in all_langs[:2]:
954+
base = f"stars:>20 archived:false pushed:>={one_year}"
952955
specs.append({
953-
"query": _build_query(base, topics=[topic]),
954-
"sort": "stars", "pages": 3, "weight": 0.8,
956+
"query": f"{base} ({cat_lang_or}) language:{lang}",
957+
"sort": "stars", "pages": 2, "weight": 1.0,
955958
})
956959

960+
# 4) Broader: all category topics combined, high stars (platform-agnostic)
961+
cat_all_or = " OR ".join(f"topic:{t}" for t in topics[:8])
962+
base = f"stars:>500 archived:false pushed:>={two_years}"
963+
specs.append({
964+
"query": f"{base} ({cat_all_or})",
965+
"sort": "stars", "pages": 2, "weight": 0.8,
966+
})
967+
968+
print(f" {len(specs)} search specs ({sum(s.get('pages', 3) for s in specs)} API calls)")
969+
957970
candidates = await _collect_candidates(
958971
client, specs, platform, seen, compute_velocity=False, min_score=0,
959972
)

0 commit comments

Comments
 (0)