Skip to content

Commit 0bc0b44

Browse files
committed
feat: doc type in index.md, remove list_files from query agent, source backlink for short docs
- index.md entries now show (short) or (pageindex) type marker - Query agent prompt updated: guides agent to read sources for detail - Removed list_files tool from query agent (index.md is sufficient) - Short doc summaries now have source_doc frontmatter linking to sources/ - Reverted list_wiki_files to only list .md files - Fixed tests for model name change and agent tool count
1 parent 27a9e3a commit 0bc0b44

5 files changed

Lines changed: 38 additions & 32 deletions

File tree

openkb/agent/compiler.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -289,13 +289,20 @@ def _find_source_filename(doc_name: str, kb_dir: Path) -> str:
289289
return f"{doc_name}.pdf"
290290

291291

292-
def _write_summary(wiki_dir: Path, doc_name: str, source_file: str, summary: str, brief: str = "") -> None:
293-
"""Write summary page with frontmatter."""
292+
def _write_summary(wiki_dir: Path, doc_name: str, source_file: str, summary: str,
293+
brief: str = "", doc_type: str = "short") -> None:
294+
"""Write summary page with frontmatter.
295+
296+
For short docs, includes a ``source_doc`` field linking to the full
297+
source text in ``sources/{doc_name}.md``.
298+
"""
294299
summaries_dir = wiki_dir / "summaries"
295300
summaries_dir.mkdir(parents=True, exist_ok=True)
296301
fm_lines = [f"sources: [{source_file}]"]
297302
if brief:
298303
fm_lines.append(f"brief: {brief}")
304+
if doc_type == "short":
305+
fm_lines.append(f"source_doc: sources/{doc_name}.md")
299306
frontmatter = "---\n" + "\n".join(fm_lines) + "\n---\n\n"
300307
(summaries_dir / f"{doc_name}.md").write_text(frontmatter + summary, encoding="utf-8")
301308

@@ -442,12 +449,15 @@ def _backlink_concepts(wiki_dir: Path, doc_name: str, concept_slugs: list[str])
442449
def _update_index(
443450
wiki_dir: Path, doc_name: str, concept_names: list[str],
444451
doc_brief: str = "", concept_briefs: dict[str, str] | None = None,
452+
doc_type: str = "short",
445453
) -> None:
446454
"""Append document and concept entries to index.md.
447455
448456
When ``doc_brief`` or entries in ``concept_briefs`` are provided, entries
449-
are written as ``- [[link]] — brief text``. Existing entries are detected
450-
by the link part only, so updating a brief on a re-compile works correctly.
457+
are written as ``- [[link]] (type) — brief text``. Existing entries are
458+
detected by the link part only, so updating a brief on a re-compile works.
459+
``doc_type`` is ``"short"`` or ``"pageindex"`` — shown in the entry so the
460+
query agent knows how to access detailed content.
451461
"""
452462
if concept_briefs is None:
453463
concept_briefs = {}
@@ -463,7 +473,7 @@ def _update_index(
463473

464474
doc_link = f"[[summaries/{doc_name}]]"
465475
if doc_link not in text:
466-
doc_entry = f"- {doc_link}"
476+
doc_entry = f"- {doc_link} ({doc_type})"
467477
if doc_brief:
468478
doc_entry += f" — {doc_brief}"
469479
if "## Documents" in text:
@@ -498,6 +508,7 @@ async def _compile_concepts(
498508
doc_name: str,
499509
max_concurrency: int,
500510
doc_brief: str = "",
511+
doc_type: str = "short",
501512
) -> None:
502513
"""Shared Steps 2-4: concepts plan → generate/update → index.
503514
@@ -635,7 +646,8 @@ async def _gen_update(concept: dict) -> tuple[str, str, bool, str]:
635646

636647
# --- Step 4: Update index (code only) ---
637648
_update_index(wiki_dir, doc_name, concept_names,
638-
doc_brief=doc_brief, concept_briefs=concept_briefs_map)
649+
doc_brief=doc_brief, concept_briefs=concept_briefs_map,
650+
doc_type=doc_type)
639651

640652

641653
async def compile_short_doc(
@@ -684,6 +696,7 @@ async def compile_short_doc(
684696
await _compile_concepts(
685697
wiki_dir, kb_dir, model, system_msg, doc_msg,
686698
summary, doc_name, max_concurrency, doc_brief=doc_brief,
699+
doc_type="short",
687700
)
688701

689702

@@ -726,4 +739,5 @@ async def compile_long_doc(
726739
await _compile_concepts(
727740
wiki_dir, kb_dir, model, system_msg, doc_msg,
728741
overview, doc_name, max_concurrency, doc_brief=doc_description,
742+
doc_type="pageindex",
729743
)

openkb/agent/query.py

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,27 +5,27 @@
55

66
from agents import Agent, Runner, function_tool
77

8-
from openkb.agent.tools import list_wiki_files, read_wiki_file
9-
from openkb.schema import SCHEMA_MD, get_agents_md
8+
from openkb.agent.tools import read_wiki_file
9+
from openkb.schema import get_agents_md
1010

1111
_QUERY_INSTRUCTIONS_TEMPLATE = """\
1212
You are a knowledge-base Q&A agent. You answer questions by searching the wiki.
1313
1414
{schema_md}
1515
1616
## Search strategy
17-
1. Read index.md to understand what documents and concepts are available.
18-
Each entry has a brief summary to help you judge relevance.
17+
1. Read index.md to see all documents and concepts with brief summaries.
18+
Each document is marked (short) or (pageindex) to indicate its type.
1919
2. Read relevant summary pages (summaries/) for document overviews.
2020
3. Read concept pages (concepts/) for cross-document synthesis.
21-
4. For long documents, use get_page_content(doc_name, pages) to read
22-
specific pages when you need detailed content. The summary page
23-
shows chapter structure with page ranges to help you decide which
24-
pages to read.
25-
5. Synthesise a clear, well-cited answer.
26-
27-
Always ground your answer in the wiki content. If you cannot find relevant
28-
information, say so clearly.
21+
4. When you need detailed source content:
22+
- Short documents: read_file("sources/{{doc_name}}.md") for the full text.
23+
- PageIndex documents: use get_page_content(doc_name, pages) to read
24+
specific pages. The summary page shows chapter structure with page
25+
ranges to help you decide which pages to read.
26+
5. Synthesise a clear, well-cited answer grounded in wiki content.
27+
28+
If you cannot find relevant information, say so clearly.
2929
"""
3030

3131

@@ -35,14 +35,6 @@ def build_query_agent(wiki_root: str, model: str, language: str = "en") -> Agent
3535
instructions = _QUERY_INSTRUCTIONS_TEMPLATE.format(schema_md=schema_md)
3636
instructions += f"\n\nIMPORTANT: Write all wiki content in {language} language."
3737

38-
@function_tool
39-
def list_files(directory: str) -> str:
40-
"""List all Markdown files in a wiki subdirectory.
41-
Args:
42-
directory: Subdirectory path relative to wiki root (e.g. 'sources').
43-
"""
44-
return list_wiki_files(directory, wiki_root)
45-
4638
@function_tool
4739
def read_file(path: str) -> str:
4840
"""Read a Markdown file from the wiki.
@@ -68,7 +60,7 @@ def get_page_content_tool(doc_name: str, pages: str) -> str:
6860
return Agent(
6961
name="wiki-query",
7062
instructions=instructions,
71-
tools=[list_files, read_file, get_page_content_tool],
63+
tools=[read_file, get_page_content_tool],
7264
model=f"litellm/{model}",
7365
model_settings=ModelSettings(parallel_tool_calls=False),
7466
)

tests/test_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def test_appends_entries_with_briefs(self, tmp_path):
160160
doc_brief="Introduces transformers",
161161
concept_briefs={"attention": "Focus mechanism", "transformer": "NN architecture"})
162162
text = (wiki / "index.md").read_text()
163-
assert "[[summaries/my-doc]] — Introduces transformers" in text
163+
assert "[[summaries/my-doc]] (short) — Introduces transformers" in text
164164
assert "[[concepts/attention]] — Focus mechanism" in text
165165
assert "[[concepts/transformer]] — NN architecture" in text
166166

tests/test_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def test_default_config_keys():
1010

1111

1212
def test_default_config_values():
13-
assert DEFAULT_CONFIG["model"] == "gpt-5.4"
13+
assert DEFAULT_CONFIG["model"] == "gpt-5.4-mini"
1414
assert DEFAULT_CONFIG["language"] == "en"
1515
assert DEFAULT_CONFIG["pageindex_threshold"] == 20
1616

tests/test_query.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,16 @@ def test_agent_name(self, tmp_path):
1515
agent = build_query_agent(str(tmp_path), "gpt-4o-mini")
1616
assert agent.name == "wiki-query"
1717

18-
def test_agent_has_three_tools(self, tmp_path):
18+
def test_agent_has_two_tools(self, tmp_path):
1919
agent = build_query_agent(str(tmp_path), "gpt-4o-mini")
20-
assert len(agent.tools) == 3
20+
assert len(agent.tools) == 2
2121

2222
def test_agent_tool_names(self, tmp_path):
2323
agent = build_query_agent(str(tmp_path), "gpt-4o-mini")
2424
names = {t.name for t in agent.tools}
25-
assert "list_files" in names
2625
assert "read_file" in names
2726
assert "get_page_content_tool" in names
27+
assert "list_files" not in names
2828
assert "pageindex_retrieve" not in names
2929

3030
def test_instructions_mention_get_page_content(self, tmp_path):

0 commit comments

Comments
 (0)