@@ -212,7 +212,47 @@ static int create_user_indexes(cbm_store_t *s) {
212212 "CREATE INDEX IF NOT EXISTS idx_edges_type ON edges(project, type);"
213213 "CREATE INDEX IF NOT EXISTS idx_edges_target_type ON edges(project, target_id, type);"
214214 "CREATE INDEX IF NOT EXISTS idx_edges_source_type ON edges(project, source_id, type);" ;
215- return exec_sql (s , sql );
215+ int rc = exec_sql (s , sql );
216+ if (rc != SQLITE_OK ) return rc ;
217+
218+ /* FTS5 full-text search index on node names for BM25 ranking.
219+ * content='nodes' makes it an external-content table — synced via triggers.
220+ * Each DDL statement must be executed separately for FTS5 compatibility. */
221+ {
222+ char * fts_err = NULL ;
223+ int fts_rc = sqlite3_exec (s -> db ,
224+ "CREATE VIRTUAL TABLE IF NOT EXISTS nodes_fts USING fts5("
225+ "name, qualified_name, label, file_path,"
226+ "content='nodes', content_rowid='id',"
227+ "tokenize='unicode61 remove_diacritics 2'"
228+ ");" ,
229+ NULL , NULL , & fts_err );
230+ if (fts_rc != SQLITE_OK ) {
231+ sqlite3_free (fts_err );
232+ /* Non-fatal — FTS5 may not be compiled in. Fall back to regex search. */
233+ return SQLITE_OK ;
234+ }
235+ }
236+
237+ /* Sync triggers: keep FTS index up to date when nodes change */
238+ exec_sql (s , "CREATE TRIGGER IF NOT EXISTS nodes_fts_ai AFTER INSERT ON nodes BEGIN"
239+ " INSERT INTO nodes_fts(rowid, name, qualified_name, label, file_path)"
240+ " VALUES (new.id, new.name, new.qualified_name, new.label, new.file_path);"
241+ "END;" );
242+
243+ exec_sql (s , "CREATE TRIGGER IF NOT EXISTS nodes_fts_ad AFTER DELETE ON nodes BEGIN"
244+ " INSERT INTO nodes_fts(nodes_fts, rowid, name, qualified_name, label, file_path)"
245+ " VALUES ('delete', old.id, old.name, old.qualified_name, old.label, old.file_path);"
246+ "END;" );
247+
248+ exec_sql (s , "CREATE TRIGGER IF NOT EXISTS nodes_fts_au AFTER UPDATE ON nodes BEGIN"
249+ " INSERT INTO nodes_fts(nodes_fts, rowid, name, qualified_name, label, file_path)"
250+ " VALUES ('delete', old.id, old.name, old.qualified_name, old.label, old.file_path);"
251+ " INSERT INTO nodes_fts(rowid, name, qualified_name, label, file_path)"
252+ " VALUES (new.id, new.name, new.qualified_name, new.label, new.file_path);"
253+ "END;" );
254+
255+ return SQLITE_OK ;
216256}
217257
218258static int configure_pragmas (cbm_store_t * s , bool in_memory ) {
@@ -474,6 +514,10 @@ static void finalize_stmt(sqlite3_stmt **s) {
474514 }
475515}
476516
517+ int cbm_store_exec (cbm_store_t * s , const char * sql ) {
518+ return exec_sql (s , sql );
519+ }
520+
477521void cbm_store_close (cbm_store_t * s ) {
478522 if (!s ) {
479523 return ;
@@ -1955,6 +1999,125 @@ int cbm_store_search(cbm_store_t *s, const cbm_search_params_t *params, cbm_sear
19551999 char count_sql [4096 ];
19562000 int bind_idx = 0 ;
19572001
2002+ /* ── FTS5 BM25 path: when params->query is set, use full-text search ── */
2003+ if (params -> query && params -> query [0 ]) {
2004+ /* Build FTS5 query: JOIN nodes_fts for BM25 ranking.
2005+ * Tokenize the user query into FTS5 OR terms for broader matching.
2006+ * "authentication middleware" → "authentication OR middleware" */
2007+ char fts_query [1024 ];
2008+ {
2009+ const char * q = params -> query ;
2010+ int fqlen = 0 ;
2011+ bool in_word = false;
2012+ bool first_word = true;
2013+ while (* q && fqlen < (int )sizeof (fts_query ) - 20 ) {
2014+ if ((* q >= 'a' && * q <= 'z' ) || (* q >= 'A' && * q <= 'Z' ) ||
2015+ (* q >= '0' && * q <= '9' ) || * q == '_' || * q == '-' ) {
2016+ if (!in_word && !first_word ) {
2017+ fqlen += snprintf (fts_query + fqlen , sizeof (fts_query ) - fqlen , " OR " );
2018+ }
2019+ fts_query [fqlen ++ ] = * q ;
2020+ in_word = true;
2021+ first_word = false;
2022+ } else {
2023+ if (in_word ) {
2024+ fts_query [fqlen ++ ] = ' ' ;
2025+ }
2026+ in_word = false;
2027+ }
2028+ q ++ ;
2029+ }
2030+ fts_query [fqlen ] = '\0' ;
2031+ }
2032+
2033+ char fts_sql [4096 ];
2034+ /* Join with FTS5 table, filter by project/label, order by BM25 rank */
2035+ int flen = snprintf (fts_sql , sizeof (fts_sql ),
2036+ "SELECT n.id, n.project, n.label, n.name, n.qualified_name, "
2037+ "n.file_path, n.start_line, n.end_line, n.properties, "
2038+ "(SELECT COUNT(*) FROM edges e WHERE e.target_id = n.id AND e.type = 'CALLS') AS in_deg, "
2039+ "(SELECT COUNT(*) FROM edges e WHERE e.source_id = n.id AND e.type = 'CALLS') AS out_deg, "
2040+ "bm25(nodes_fts) AS rank "
2041+ "FROM nodes_fts "
2042+ "JOIN nodes n ON n.id = nodes_fts.rowid "
2043+ "WHERE nodes_fts MATCH ?1" );
2044+
2045+ int fts_bind_idx = 1 ;
2046+ if (params -> project ) {
2047+ fts_bind_idx ++ ;
2048+ flen += snprintf (fts_sql + flen , sizeof (fts_sql ) - flen ,
2049+ " AND n.project = ?%d" , fts_bind_idx );
2050+ }
2051+ if (params -> label ) {
2052+ fts_bind_idx ++ ;
2053+ flen += snprintf (fts_sql + flen , sizeof (fts_sql ) - flen ,
2054+ " AND n.label = ?%d" , fts_bind_idx );
2055+ }
2056+
2057+ int limit = params -> limit > 0 ? params -> limit : 50 ;
2058+ flen += snprintf (fts_sql + flen , sizeof (fts_sql ) - flen ,
2059+ " ORDER BY rank LIMIT %d OFFSET %d" , limit , params -> offset );
2060+
2061+ /* Count query */
2062+ char fts_count [4096 ];
2063+ snprintf (fts_count , sizeof (fts_count ),
2064+ "SELECT COUNT(*) FROM nodes_fts "
2065+ "JOIN nodes n ON n.id = nodes_fts.rowid "
2066+ "WHERE nodes_fts MATCH ?1%s%s" ,
2067+ params -> project ? " AND n.project = ?2" : "" ,
2068+ params -> label ? (params -> project ? " AND n.label = ?3" : " AND n.label = ?2" ) : "" );
2069+
2070+ /* Execute count */
2071+ sqlite3_stmt * cnt_stmt = NULL ;
2072+ if (sqlite3_prepare_v2 (s -> db , fts_count , -1 , & cnt_stmt , NULL ) == SQLITE_OK ) {
2073+ bind_text (cnt_stmt , 1 , fts_query );
2074+ int bi = 1 ;
2075+ if (params -> project ) { bi ++ ; bind_text (cnt_stmt , bi , params -> project ); }
2076+ if (params -> label ) { bi ++ ; bind_text (cnt_stmt , bi , params -> label ); }
2077+ if (sqlite3_step (cnt_stmt ) == SQLITE_ROW ) {
2078+ out -> total = sqlite3_column_int (cnt_stmt , 0 );
2079+ }
2080+ sqlite3_finalize (cnt_stmt );
2081+ }
2082+
2083+ /* Execute main query */
2084+ sqlite3_stmt * main_stmt = NULL ;
2085+ int rc = sqlite3_prepare_v2 (s -> db , fts_sql , -1 , & main_stmt , NULL );
2086+ if (rc != SQLITE_OK ) {
2087+ /* FTS5 table may not exist for older DBs — fall through to regex path */
2088+ /* FTS5 table may not exist for older DBs — silently fall through */
2089+ goto regex_path ;
2090+ }
2091+ bind_text (main_stmt , 1 , fts_query );
2092+ {
2093+ int bi = 1 ;
2094+ if (params -> project ) { bi ++ ; bind_text (main_stmt , bi , params -> project ); }
2095+ if (params -> label ) { bi ++ ; bind_text (main_stmt , bi , params -> label ); }
2096+ }
2097+
2098+ int cap = 16 ;
2099+ int n = 0 ;
2100+ cbm_search_result_t * results = malloc (cap * sizeof (cbm_search_result_t ));
2101+ while (sqlite3_step (main_stmt ) == SQLITE_ROW ) {
2102+ if (n >= cap ) {
2103+ cap *= 2 ;
2104+ results = safe_realloc (results , cap * sizeof (cbm_search_result_t ));
2105+ }
2106+ memset (& results [n ], 0 , sizeof (cbm_search_result_t ));
2107+ scan_node (main_stmt , & results [n ].node );
2108+ results [n ].in_degree = sqlite3_column_int (main_stmt , 9 );
2109+ results [n ].out_degree = sqlite3_column_int (main_stmt , 10 );
2110+ n ++ ;
2111+ }
2112+ sqlite3_finalize (main_stmt );
2113+ out -> results = results ;
2114+ out -> count = n ;
2115+ return CBM_STORE_OK ;
2116+ }
2117+
2118+ regex_path :
2119+ /* ── Regex path: original regex-based search ── */
2120+
19582121 /* We build a query that selects nodes with optional degree subqueries */
19592122 const char * select_cols =
19602123 "SELECT n.id, n.project, n.label, n.name, n.qualified_name, "
0 commit comments