Skip to content

Commit 2ba4d6a

Browse files
committed
feat: Enhance RLM agent with multi-turn state for repo tools.
1 parent ed4985f commit 2ba4d6a

9 files changed

Lines changed: 1361 additions & 15 deletions

npx/python/cli/repo_tools.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,10 +247,12 @@ async def list_directory(self, path: str = "") -> list[dict[str, Any]]:
247247
# Directory listing
248248
entries = []
249249
for item in data:
250+
# Use 0 instead of None for directory size to avoid JSON null -> Pyodide issues
251+
size = item.get("size") if item.get("type") == "file" else 0
250252
entries.append({
251253
"path": item.get("path", ""),
252254
"type": item.get("type", "file"),
253-
"size": item.get("size") if item.get("type") == "file" else None,
255+
"size": size,
254256
})
255257
return entries
256258

npx/python/cli/virtual_runner.py

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,32 +30,53 @@
3030
3131
### STEP 1: Find file paths with SEARCH_CODE
3232
```python
33-
print("SEARCH_CODE:rlm.py") # Search for files by name
34-
print("SEARCH_CODE:enable_tool_optimization") # Search for code patterns
33+
# CORRECT - no quotes around the search term after the colon
34+
print("SEARCH_CODE:rlm.py")
35+
print("SEARCH_CODE:enable_tool_optimization")
36+
print("SEARCH_CODE:llm_query_batched")
37+
38+
# WRONG - do NOT add extra quotes or parentheses
39+
# print("SEARCH_CODE:rlm.py")") # ❌ Extra quote/paren
40+
# print("SEARCH_CODE:'rlm.py'") # ❌ Quoted search term
3541
```
3642
Results appear in `search_results` on your NEXT step.
3743
3844
### STEP 2: Fetch files with FETCH_FILE
3945
```python
40-
print("FETCH_FILE:dspy/predict/rlm.py") # Exact path required
46+
# CORRECT - exact path, no extra quotes
47+
print("FETCH_FILE:dspy/predict/rlm.py")
48+
print("FETCH_FILE:tests/predict/test_rlm.py")
49+
50+
# WRONG - do NOT add extra quotes or parentheses
51+
# print("FETCH_FILE:dspy/predict/rlm.py")") # ❌ Extra quote/paren
4152
```
4253
Content appears in `repo_files['dspy/predict/rlm.py']` on your NEXT step.
4354
4455
### STEP 3: List directories with LIST_DIR
4556
```python
46-
print("LIST_DIR:dspy/predict/")
57+
# CORRECT - use LIST_DIR (NOT LIST_FILES!)
58+
print("LIST_DIR:dspy/predict")
59+
print("LIST_DIR:dspy/predict/") # trailing slash is OK
60+
print("LIST_DIR:tests")
61+
62+
# WRONG - do NOT use these non-existent commands
63+
# print("LIST_FILES:dspy/predict") # ❌ No such command as LIST_FILES
64+
# print("LIST_DIRECTORY:dspy/predict") # ❌ Wrong name
65+
# print("LS:dspy/predict") # ❌ Not supported
4766
```
48-
Entries appear in `repo_dirs['dspy/predict/']` on your NEXT step.
67+
Entries appear in `repo_dirs['dspy/predict']` on your NEXT step as a list of dicts with 'path', 'type', and 'size'.
4968
5069
### IMPORTANT NOTES:
5170
- Commands print EXACTLY as shown, the system intercepts them
71+
- The command is `LIST_DIR` not `LIST_FILES` or `LIST_DIRECTORY`
5272
- Do NOT try os.walk() or open() - those won't work
5373
- Data becomes available on your NEXT step
5474
- Check `repo_files`, `repo_dirs`, `search_results` dicts
75+
- SYNTAX: Make sure print statements are properly closed with exactly ONE `")`
5576
5677
### WORKFLOW EXAMPLE (finding if rlm.py contains X):
57-
Step 1: `print("SEARCH_CODE:rlm.py")` → find the path
58-
Step 2: Check `search_results` for path, then `print("FETCH_FILE:dspy/predict/rlm.py")`
78+
Step 1: `print("LIST_DIR:dspy/predict")` → get directory contents
79+
Step 2: Check `repo_dirs['dspy/predict']` for file list, then `print("FETCH_FILE:dspy/predict/rlm.py")`
5980
Step 3: Check `repo_files['dspy/predict/rlm.py']` for content
6081
"""
6182

@@ -249,18 +270,21 @@ async def _run_rlm_with_tools(self, context: str, question: str) -> tuple[str, l
249270
# Prepend tool instructions to question (treated as instructions, not data)
250271
augmented_question = AGENTIC_TOOLS_PROMPT + "\n\n---\n\n**USER QUESTION:** " + question
251272

252-
input_args = {
253-
"context": context,
254-
"question": augmented_question,
255-
}
256-
257-
variables = rlm._build_variables(**input_args)
258-
259273
with dspy.context(lm=self._lm):
260274
with rlm._interpreter_context(execution_tools) as repl:
261275
history = REPLHistory()
262276

263277
for iteration in range(rlm.max_iterations):
278+
# Rebuild variables with current tool state so LLM sees available data
279+
input_args = {
280+
"context": context,
281+
"question": augmented_question,
282+
"repo_files": self._repo_files,
283+
"repo_dirs": self._repo_dirs,
284+
"search_results": self._search_results,
285+
}
286+
variables = rlm._build_variables(**input_args)
287+
264288
variables_info = [variable.format() for variable in variables]
265289
pred = await rlm.generate_action.acall(
266290
variables_info=variables_info,
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
"""Minimal reproduction of FETCH_FILE issue.
2+
3+
This script tests the fix for the variable rebuild issue in VirtualReviewRunner.
4+
Before fix: repo_files appears empty in LLM prompt even after files are fetched.
5+
After fix: repo_files is properly populated in each iteration's prompt.
6+
"""
7+
8+
import asyncio
9+
import os
10+
from cli.virtual_runner import VirtualReviewRunner
11+
12+
13+
async def repro():
14+
"""Run reproduction test with detailed debug output."""
15+
# Set API key
16+
api_key = os.getenv("GEMINI_API_KEY")
17+
if not api_key:
18+
print("ERROR: GEMINI_API_KEY not set")
19+
return
20+
21+
print("=" * 80)
22+
print("FETCH_FILE REPRODUCTION TEST")
23+
print("=" * 80)
24+
25+
runner = VirtualReviewRunner(model="gemini-3-flash-preview", quiet=False)
26+
runner._ensure_configured()
27+
28+
# Intercept acall to debug variable state
29+
original_acall = runner._rlm.generate_action.acall
30+
iterations_data = []
31+
32+
async def intercepted_acall(*args, **kwargs):
33+
iteration = kwargs.get('iteration', 'unknown')
34+
variables_info = kwargs.get('variables_info', [])
35+
36+
print(f"\n{'='*80}")
37+
print(f"[DEBUG] Iteration {iteration}")
38+
print(f"{'='*80}")
39+
40+
# Check what variables are being passed to the LLM
41+
for i, v in enumerate(variables_info):
42+
var_preview = v[:300] if len(v) > 300 else v
43+
print(f"\nVariable {i} (length: {len(v)}):")
44+
print(var_preview)
45+
if len(v) > 300:
46+
print("... (truncated)")
47+
48+
# Check if repo_files is mentioned in the prompt
49+
variables_str = "\n".join(variables_info)
50+
has_repo_files = "repo_files" in variables_str
51+
52+
# Count how many files are in repo_files dict (look for dictionary representation)
53+
import re
54+
files_match = re.search(r"repo_files.*?(\{[^}]*\})", variables_str, re.DOTALL)
55+
files_count = 0
56+
if files_match:
57+
files_dict_str = files_match.group(1)
58+
# Count keys in dict representation
59+
files_count = files_dict_str.count(":")
60+
61+
print(f"\n[DEBUG] repo_files in prompt? {has_repo_files}")
62+
print(f"[DEBUG] Number of files in repo_files: {files_count}")
63+
64+
iterations_data.append({
65+
'iteration': iteration,
66+
'has_repo_files': has_repo_files,
67+
'files_count': files_count
68+
})
69+
70+
return await original_acall(*args, **kwargs)
71+
72+
runner._rlm.generate_action.acall = intercepted_acall
73+
74+
# Test URL and question
75+
url = "https://github.com/stanfordnlp/dspy/pull/9240"
76+
question = "What is in dspy/predict/rlm.py? Please fetch and analyze the complete contents of this file."
77+
78+
print(f"\nTesting URL: {url}")
79+
print(f"Question: {question}\n")
80+
81+
try:
82+
answer, sources, metadata = await runner.review(url, question)
83+
84+
print(f"\n{'='*80}")
85+
print("FINAL RESULT")
86+
print(f"{'='*80}")
87+
print(f"Answer preview: {answer[:500]}...")
88+
print(f"\nFiles fetched: {metadata.get('files_fetched', [])}")
89+
print(f"Model: {metadata.get('model')}")
90+
91+
# Analyze iterations
92+
print(f"\n{'='*80}")
93+
print("ITERATION ANALYSIS")
94+
print(f"{'='*80}")
95+
for data in iterations_data:
96+
print(f"Iteration {data['iteration']}: "
97+
f"repo_files={'✓' if data['has_repo_files'] else '✗'}, "
98+
f"files={data['files_count']}")
99+
100+
# Determine if fix worked
101+
print(f"\n{'='*80}")
102+
print("VERDICT")
103+
print(f"{'='*80}")
104+
105+
if len(iterations_data) > 1:
106+
# After first iteration, repo_files should be populated if files were fetched
107+
later_iterations = iterations_data[1:]
108+
has_populated = any(d['files_count'] > 0 for d in later_iterations)
109+
110+
if has_populated:
111+
print("✓ SUCCESS: repo_files was populated in later iterations")
112+
print(" The fix is working correctly!")
113+
else:
114+
print("✗ FAILURE: repo_files never got populated in later iterations")
115+
print(" The bug still exists!")
116+
else:
117+
print("? UNCLEAR: Only one iteration ran, cannot determine if fix works")
118+
119+
except Exception as e:
120+
print(f"\n{'='*80}")
121+
print("ERROR DURING EXECUTION")
122+
print(f"{'='*80}")
123+
print(f"Error: {e}")
124+
import traceback
125+
traceback.print_exc()
126+
127+
128+
if __name__ == "__main__":
129+
asyncio.run(repro())
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
"""Direct test of LIST_DIR against kmad/dspy repository.
2+
3+
Tests LIST_DIR command directly against GitHub API to verify:
4+
- Directory listing works correctly
5+
- Returns subdirectories (avatar/)
6+
- Returns files (rlm.py, etc.)
7+
- Expected count: 1 subdir + 15 files
8+
9+
Based on https://github.com/kmad/dspy/tree/main/dspy/predict
10+
"""
11+
12+
import asyncio
13+
import os
14+
import sys
15+
from pathlib import Path
16+
17+
# Add parent directory to path
18+
sys.path.insert(0, str(Path(__file__).parent.parent))
19+
20+
from cli.repo_tools import RepoTools
21+
22+
23+
async def test_listdir_direct():
24+
"""Test LIST_DIR directly via RepoTools."""
25+
print("=" * 80)
26+
print("LIST_DIR DIRECT TEST - kmad/dspy/dspy/predict")
27+
print("=" * 80)
28+
29+
owner = "kmad"
30+
repo = "dspy"
31+
path = "dspy/predict"
32+
ref = "main"
33+
34+
tools = RepoTools(owner, repo, ref)
35+
36+
try:
37+
print(f"\nListing directory: {owner}/{repo}/{path} @ {ref}")
38+
entries = await tools.list_directory(path)
39+
40+
if not entries:
41+
print("❌ FAILED: No entries returned")
42+
return False
43+
44+
# Separate dirs and files
45+
dirs = [e for e in entries if e['type'] == 'dir']
46+
files = [e for e in entries if e['type'] == 'file']
47+
48+
print(f"\n✓ SUCCESS: Found {len(entries)} total entries")
49+
print(f" - {len(dirs)} subdirectories")
50+
print(f" - {len(files)} files")
51+
52+
# List subdirectories
53+
print(f"\nSubdirectories:")
54+
for d in dirs:
55+
name = d['path'].split('/')[-1]
56+
print(f" 📁 {name}")
57+
58+
# List files (first 20)
59+
print(f"\nFiles (showing first 20):")
60+
for f in files[:20]:
61+
name = f['path'].split('/')[-1]
62+
print(f" 📄 {name}")
63+
64+
# Verify expectations from screenshot
65+
print("\n" + "=" * 80)
66+
print("VERIFICATION")
67+
print("=" * 80)
68+
69+
success = True
70+
71+
# Check for avatar subdirectory
72+
avatar_found = any('avatar' in d['path'] for d in dirs)
73+
if avatar_found:
74+
print("✓ Found 'avatar' subdirectory")
75+
else:
76+
print("❌ 'avatar' subdirectory NOT found")
77+
success = False
78+
79+
# Check for rlm.py file
80+
rlm_found = any('rlm.py' in f['path'] for f in files)
81+
if rlm_found:
82+
print("✓ Found 'rlm.py' file")
83+
else:
84+
print("❌ 'rlm.py' file NOT found")
85+
success = False
86+
87+
# Check counts (approximate - may change)
88+
if len(dirs) >= 1:
89+
print(f"✓ Has at least 1 subdirectory (found {len(dirs)})")
90+
else:
91+
print(f"❌ Expected at least 1 subdirectory, found {len(dirs)}")
92+
success = False
93+
94+
if len(files) >= 10:
95+
print(f"✓ Has at least 10 files (found {len(files)})")
96+
else:
97+
print(f"⚠️ Expected at least 10 files, found {len(files)}")
98+
99+
return success
100+
101+
finally:
102+
await tools.close()
103+
104+
105+
if __name__ == "__main__":
106+
success = asyncio.run(test_listdir_direct())
107+
exit(0 if success else 1)

0 commit comments

Comments
 (0)