1212import asyncio
1313import json
1414import logging
15+ import re
1516import sys
17+ import threading
1618import time
1719from pathlib import Path
1820
9597# LLM helpers
9698# ---------------------------------------------------------------------------
9799
98- import threading
99-
100-
101100class _Spinner :
102101 """Animated dots spinner that runs in a background thread."""
103102
@@ -208,6 +207,37 @@ def _read_wiki_context(wiki_dir: Path) -> tuple[str, list[str]]:
208207 return index_content , existing
209208
210209
210+ def _read_concept_briefs (wiki_dir : Path ) -> str :
211+ """Read existing concept pages and return compact one-line summaries.
212+
213+ For each concept, skips YAML frontmatter, takes the first 150 chars of the
214+ body (newlines collapsed to spaces), and formats as ``- {slug}: {brief}``.
215+
216+ Returns "(none yet)" if the concepts directory is missing or empty.
217+ """
218+ concepts_dir = wiki_dir / "concepts"
219+ if not concepts_dir .exists ():
220+ return "(none yet)"
221+
222+ md_files = sorted (concepts_dir .glob ("*.md" ))
223+ if not md_files :
224+ return "(none yet)"
225+
226+ lines : list [str ] = []
227+ for path in md_files :
228+ text = path .read_text (encoding = "utf-8" )
229+ # Strip YAML frontmatter if present
230+ if text .startswith ("---" ):
231+ end = text .find ("---" , 3 )
232+ if end != - 1 :
233+ text = text [end + 3 :]
234+ body = text .strip ().replace ("\n " , " " )
235+ brief = body [:150 ]
236+ lines .append (f"- { path .stem } : { brief } " )
237+
238+ return "\n " .join (lines )
239+
240+
211241def _find_source_filename (doc_name : str , kb_dir : Path ) -> str :
212242 """Find the original filename in raw/ for a given doc stem."""
213243 raw_dir = kb_dir / "raw"
@@ -226,11 +256,24 @@ def _write_summary(wiki_dir: Path, doc_name: str, source_file: str, summary: str
226256 (summaries_dir / f"{ doc_name } .md" ).write_text (frontmatter + summary , encoding = "utf-8" )
227257
228258
259+ _SAFE_NAME_RE = re .compile (r'[^a-zA-Z0-9_\-]' )
260+
261+
262+ def _sanitize_concept_name (name : str ) -> str :
263+ """Sanitize a concept name for safe use as a filename."""
264+ sanitized = _SAFE_NAME_RE .sub ("-" , name ).strip ("-" )
265+ return sanitized or "unnamed-concept"
266+
267+
229268def _write_concept (wiki_dir : Path , name : str , content : str , source_file : str , is_update : bool ) -> None :
230269 """Write or update a concept page, managing the sources frontmatter."""
231270 concepts_dir = wiki_dir / "concepts"
232271 concepts_dir .mkdir (parents = True , exist_ok = True )
233- path = concepts_dir / f"{ name } .md"
272+ safe_name = _sanitize_concept_name (name )
273+ path = (concepts_dir / f"{ safe_name } .md" ).resolve ()
274+ if not path .is_relative_to (concepts_dir .resolve ()):
275+ logger .warning ("Concept name escapes concepts dir: %s" , name )
276+ return
234277
235278 if is_update and path .exists ():
236279 existing = path .read_text (encoding = "utf-8" )
@@ -241,7 +284,11 @@ def _write_concept(wiki_dir: Path, name: str, content: str, source_file: str, is
241284 body = existing [end + 3 :]
242285 if "sources:" in fm :
243286 fm = fm .replace ("sources: [" , f"sources: [{ source_file } , " )
287+ else :
288+ fm = fm .replace ("---\n " , f"---\n sources: [{ source_file } ]\n " , 1 )
244289 existing = fm + body
290+ else :
291+ existing = f"---\n sources: [{ source_file } ]\n ---\n \n " + existing
245292 existing += f"\n \n { content } "
246293 path .write_text (existing , encoding = "utf-8" )
247294 else :
0 commit comments