diff --git a/internal/cbm/extract_defs.c b/internal/cbm/extract_defs.c index 7e7fd5fd..799e92ed 100644 --- a/internal/cbm/extract_defs.c +++ b/internal/cbm/extract_defs.c @@ -458,56 +458,6 @@ static TSNode resolve_func_name_fp(TSNode node, CBMLanguage lang, const char *ki return null_node; } -// C++/CUDA: out-of-line method definitions name the function with a qualified -// declarator (`Foo::bar`, or `ns::Foo::bar`). Return the immediate enclosing -// class name (the scope segment directly left of the function name, e.g. "Foo"), -// or NULL when the declarator is unqualified (a plain free function). Without -// this, an out-of-line definition — whose class body lives declaration-only in a -// header — would be recorded as a free Function with no link to its class. -static char *cpp_out_of_line_parent_class(CBMArena *a, TSNode node, const char *source) { - // Descend the declarator chain to its qualified_identifier, if any. - TSNode qid = {0}; - TSNode decl = ts_node_child_by_field_name(node, TS_FIELD("declarator")); - for (int depth = 0; depth < DECLARATOR_DEPTH_LIMIT && !ts_node_is_null(decl); depth++) { - const char *dk = ts_node_type(decl); - if (strcmp(dk, "qualified_identifier") == 0 || strcmp(dk, "scoped_identifier") == 0) { - qid = decl; - break; - } - TSNode inner = ts_node_child_by_field_name(decl, TS_FIELD("declarator")); - if (ts_node_is_null(inner) && ts_node_named_child_count(decl) > 0) { - inner = ts_node_named_child(decl, 0); - } - if (ts_node_is_null(inner)) { - break; - } - decl = inner; - } - if (ts_node_is_null(qid)) { - return NULL; - } - // The qualified_identifier's `scope` is the parent. For a nested scope - // (`ns::Foo`) descend through its `name` field to the innermost segment so - // the direct parent ("Foo") is returned, not the outer namespace. - TSNode scope = ts_node_child_by_field_name(qid, TS_FIELD("scope")); - if (ts_node_is_null(scope)) { - return NULL; - } - for (int depth = 0; depth < DECLARATOR_DEPTH_LIMIT; depth++) { - const char *sk = ts_node_type(scope); - if (strcmp(sk, "qualified_identifier") != 0 && strcmp(sk, "scoped_identifier") != 0) { - break; - } - TSNode name = ts_node_child_by_field_name(scope, TS_FIELD("name")); - if (ts_node_is_null(name)) { - break; - } - scope = name; - } - char *text = cbm_node_text(a, scope, source); - return (text && text[0]) ? text : NULL; -} - // R: resolve function_definition name from parent binary_operator lhs. static TSNode resolve_r_func_name(TSNode node) { TSNode parent = ts_node_parent(node); @@ -2747,7 +2697,7 @@ static void extract_func_def(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec // class node QN computed the same way) so DEFINES_METHOD edges resolve. if ((ctx->language == CBM_LANG_CPP || ctx->language == CBM_LANG_CUDA) && strcmp(ts_node_type(node), "function_definition") == 0) { - char *scope_name = cpp_out_of_line_parent_class(a, node, ctx->source); + char *scope_name = cbm_cpp_out_of_line_parent_class(a, node, ctx->source); if (scope_name && scope_name[0]) { const char *class_qn = cbm_fqn_compute(a, ctx->project, ctx->rel_path, scope_name); def.qualified_name = cbm_arena_sprintf(a, "%s.%s", class_qn, name); diff --git a/internal/cbm/extract_unified.c b/internal/cbm/extract_unified.c index f65a64be..0c4ed4bc 100644 --- a/internal/cbm/extract_unified.c +++ b/internal/cbm/extract_unified.c @@ -126,6 +126,19 @@ static const char *compute_func_qn(CBMExtractCtx *ctx, TSNode node, const CBMLan return NULL; } + // C++/CUDA out-of-line method definition (`Foo::Bar` in a .cc, with or without a + // surrounding `namespace {}` block): the class body lives declaration-only in a + // header, so there is no class scope on the walk stack. Scope the QN to its class + // (matching the defs extractor) so a call inside the body attributes to the + // Method node instead of the File node. + if (ctx->language == CBM_LANG_CPP || ctx->language == CBM_LANG_CUDA) { + const char *ool = cbm_cpp_out_of_line_method_qn(ctx->arena, node, ctx->source, ctx->project, + ctx->rel_path, name); + if (ool) { + return ool; + } + } + if (state->enclosing_class_qn) { return cbm_arena_sprintf(ctx->arena, "%s.%s", state->enclosing_class_qn, name); } diff --git a/internal/cbm/helpers.c b/internal/cbm/helpers.c index c34be9b7..2af640bf 100644 --- a/internal/cbm/helpers.c +++ b/internal/cbm/helpers.c @@ -807,6 +807,61 @@ static const char *func_node_name(CBMArena *a, TSNode func_node, const char *sou return NULL; } +char *cbm_cpp_out_of_line_parent_class(CBMArena *a, TSNode func_node, const char *source) { + // Descend the declarator chain to its qualified_identifier, if any. + TSNode qid = {0}; + TSNode decl = ts_node_child_by_field_name(func_node, TS_FIELD("declarator")); + for (int depth = 0; depth < CBM_DECLARATOR_DEPTH_LIMIT && !ts_node_is_null(decl); depth++) { + const char *dk = ts_node_type(decl); + if (strcmp(dk, "qualified_identifier") == 0 || strcmp(dk, "scoped_identifier") == 0) { + qid = decl; + break; + } + TSNode inner = ts_node_child_by_field_name(decl, TS_FIELD("declarator")); + if (ts_node_is_null(inner) && ts_node_named_child_count(decl) > 0) { + inner = ts_node_named_child(decl, 0); + } + if (ts_node_is_null(inner)) { + break; + } + decl = inner; + } + if (ts_node_is_null(qid)) { + return NULL; + } + // The qualified_identifier's `scope` is the parent. For a nested scope + // (`ns::Foo`) descend through its `name` field to the innermost segment so + // the direct parent ("Foo") is returned, not the outer namespace. + TSNode scope = ts_node_child_by_field_name(qid, TS_FIELD("scope")); + if (ts_node_is_null(scope)) { + return NULL; + } + for (int depth = 0; depth < CBM_DECLARATOR_DEPTH_LIMIT; depth++) { + const char *sk = ts_node_type(scope); + if (strcmp(sk, "qualified_identifier") != 0 && strcmp(sk, "scoped_identifier") != 0) { + break; + } + TSNode name = ts_node_child_by_field_name(scope, TS_FIELD("name")); + if (ts_node_is_null(name)) { + break; + } + scope = name; + } + char *text = cbm_node_text(a, scope, source); + return (text && text[0]) ? text : NULL; +} + +const char *cbm_cpp_out_of_line_method_qn(CBMArena *a, TSNode func_node, const char *source, + const char *project, const char *rel_path, + const char *name) { + char *scope = cbm_cpp_out_of_line_parent_class(a, func_node, source); + if (!scope || !scope[0]) { + return NULL; + } + const char *class_qn = cbm_fqn_compute(a, project, rel_path, scope); + return cbm_arena_sprintf(a, "%s.%s", class_qn, name); +} + const char *cbm_enclosing_func_qn(CBMArena *a, TSNode node, CBMLanguage lang, const char *source, const char *project, const char *rel_path, const char *module_qn) { @@ -838,6 +893,19 @@ const char *cbm_enclosing_func_qn(CBMArena *a, TSNode node, CBMLanguage lang, co } } + // C++/CUDA out-of-line method definition (`Foo::Bar` in a .cc, with or without + // a surrounding `namespace {}` block): the class body lives declaration-only in + // a header, so there is no enclosing class AST node for the parent walk above to + // find. Reconstruct the class-scoped QN so a call inside the body attributes to + // the Method node instead of falling back to the File node. + if (lang == CBM_LANG_CPP || lang == CBM_LANG_CUDA) { + const char *ool = + cbm_cpp_out_of_line_method_qn(a, func_node, source, project, rel_path, name); + if (ool) { + return ool; + } + } + return cbm_fqn_compute(a, project, rel_path, name); } diff --git a/internal/cbm/helpers.h b/internal/cbm/helpers.h index 35d10892..b51cd3da 100644 --- a/internal/cbm/helpers.h +++ b/internal/cbm/helpers.h @@ -50,6 +50,24 @@ const char *cbm_enclosing_func_qn_cached(CBMExtractCtx *ctx, TSNode node); // enclosing-function attribution — drift between private copies caused #438. TSNode cbm_resolve_c_declarator_name_node(TSNode func_node); +// C++/CUDA: out-of-line method definitions name the function with a qualified +// declarator (`Foo::bar`, or `ns::Foo::bar`). Return the immediate enclosing +// class name (the scope segment directly left of the function name, e.g. "Foo"), +// or NULL when the declarator is unqualified (a plain free function). Shared by +// the defs extractor (to scope the Method node's QN) and the calls extractor (to +// build the matching enclosing-function QN) so a call inside an out-of-line +// definition attributes to the Method rather than the File node (#438 follow-up). +char *cbm_cpp_out_of_line_parent_class(CBMArena *a, TSNode func_node, const char *source); + +// Class-scoped QN (".") for a C++/CUDA out-of-line method +// definition, or NULL when `func_node` is not one (plain free function / inline +// method). `name` is the already-resolved function name. Both call extractors use +// this so a call inside the body attributes to the Method node, matching the QN +// the defs extractor builds from cbm_cpp_out_of_line_parent_class. +const char *cbm_cpp_out_of_line_method_qn(CBMArena *a, TSNode func_node, const char *source, + const char *project, const char *rel_path, + const char *name); + // Find a child node by kind string. TSNode cbm_find_child_by_kind(TSNode parent, const char *kind); diff --git a/tests/test_extraction.c b/tests/test_extraction.c index 9aba9dc7..8be48866 100644 --- a/tests/test_extraction.c +++ b/tests/test_extraction.c @@ -64,6 +64,52 @@ static CBMFileResult *extract(const char *src, CBMLanguage lang, const char *pro return r; } +/* Issue #438 follow-up: a call inside a C++ out-of-line method definition + * (`void Foo::Bar() { Baz(); }`) must attribute to the enclosing Method, whose QN + * the defs extractor scopes to its class (`t.path.Foo.Bar`). The calls extractor + * computes enclosing_func_qn independently, so the two must agree byte-for-byte or + * the pipeline's exact-QN match drops the call to the File node. This must hold + * regardless of any surrounding `namespace {}` block (namespace context is not part + * of the C++ QN scheme). Guards the call-side reconstruction of the class qualifier. */ +TEST(cpp_out_of_line_enclosing_qn) { + struct { + const char *src; + const char *path; + const char *want_enc; /* expected call enclosing_func_qn == Method def qn */ + } cases[] = { + {"using namespace mylib;\nvoid Foo::Bar() { Baz(); }\n", "g.cc", "t.g.Foo.Bar"}, + {"namespace mylib { void Foo::Bar() { Baz(); } }\n", "b.cc", "t.b.Foo.Bar"}, + {"namespace mylib { namespace inner { void Foo::Bar() { Baz(); } } }\n", "n.cc", + "t.n.Foo.Bar"}, + }; + for (int c = 0; c < 3; c++) { + CBMFileResult *r = extract(cases[c].src, CBM_LANG_CPP, "t", cases[c].path); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + /* The out-of-line def is promoted to a class-scoped Method. */ + int saw_method = 0; + for (int i = 0; i < r->defs.count; i++) { + if (strcmp(r->defs.items[i].label, "Method") == 0 && + strcmp(r->defs.items[i].qualified_name, cases[c].want_enc) == 0) { + saw_method = 1; + } + } + ASSERT(saw_method); + /* The Baz() call's enclosing QN matches that Method QN exactly. */ + int saw_baz = 0; + for (int i = 0; i < r->calls.count; i++) { + if (strcmp(r->calls.items[i].callee_name, "Baz") == 0) { + saw_baz = 1; + ASSERT_NOT_NULL(r->calls.items[i].enclosing_func_qn); + ASSERT(strcmp(r->calls.items[i].enclosing_func_qn, cases[c].want_enc) == 0); + } + } + ASSERT(saw_baz); + cbm_free_result(r); + } + PASS(); +} + /* ═══════════════════════════════════════════════════════════════════ * Group A: OOP Languages * ═══════════════════════════════════════════════════════════════════ */ @@ -3117,6 +3163,7 @@ SUITE(extraction) { RUN_TEST(wolfram_call); RUN_TEST(wolfram_caller_attribution); RUN_TEST(c_caller_attribution); + RUN_TEST(cpp_out_of_line_enclosing_qn); RUN_TEST(wolfram_parse); RUN_TEST(wolfram_import); RUN_TEST(wolfram_nested_def); diff --git a/tests/test_pipeline.c b/tests/test_pipeline.c index aca6c0d7..5a42e1ca 100644 --- a/tests/test_pipeline.c +++ b/tests/test_pipeline.c @@ -13,6 +13,7 @@ #include "store/store.h" #include "git/git_context.h" #include "foundation/dump_verify.h" +#include // raw edge-source query for out-of-line call attribution (#438 follow-up) #include #include @@ -97,6 +98,31 @@ static void teardown_test_repo(void) { g_tmpdir[0] = '\0'; } +/* Create an empty temp repo (caller writes files via write_repo_file). */ +static int setup_empty_repo(void) { + snprintf(g_tmpdir, sizeof(g_tmpdir), "/tmp/cbm_test_XXXXXX"); + return cbm_mkdtemp(g_tmpdir) ? 0 : -1; +} + +/* Write one file (relative path under g_tmpdir), creating parent dirs. */ +static int write_repo_file(const char *rel, const char *content) { + char path[700]; + snprintf(path, sizeof(path), "%s/%s", g_tmpdir, rel); + char *slash = strrchr(path, '/'); + if (slash && slash > path + strlen(g_tmpdir)) { + *slash = '\0'; + cbm_mkdir_p(path, 0755); + *slash = '/'; + } + FILE *f = fopen(path, "wb"); + if (!f) { + return -1; + } + fputs(content, f); + fclose(f); + return 0; +} + /* ── Lifecycle tests ─────────────────────────────────────────────── */ TEST(pipeline_create_free) { @@ -907,6 +933,79 @@ TEST(pipeline_incremental_preserves_cross_file_calls) { PASS(); } +/* Return the label of the node that SOURCES the CALLS edge to `callee`, where the + * source node's QN contains `src_stem` (the defining file's stem). Caller-owned + * buffer. Returns -1 if no such CALLS edge, else CBM_STORE_OK with label copied. */ +static int calls_source_label(sqlite3 *db, const char *callee, const char *src_stem, char *out, + size_t out_sz) { + const char *sql = "SELECT src.label FROM edges e " + "JOIN nodes src ON e.source_id = src.id " + "JOIN nodes dst ON e.target_id = dst.id " + "WHERE e.type='CALLS' AND dst.name=?1 " + "AND src.qualified_name LIKE '%' || ?2 || '%' LIMIT 1;"; + sqlite3_stmt *st = NULL; + if (sqlite3_prepare_v2(db, sql, -1, &st, NULL) != SQLITE_OK) { + return -1; + } + sqlite3_bind_text(st, 1, callee, -1, SQLITE_STATIC); + sqlite3_bind_text(st, 2, src_stem, -1, SQLITE_STATIC); + int found = -1; + if (sqlite3_step(st) == SQLITE_ROW) { + const unsigned char *lbl = sqlite3_column_text(st, 0); + snprintf(out, out_sz, "%s", lbl ? (const char *)lbl : ""); + found = 0; + } + sqlite3_finalize(st); + return found; +} + +/* Issue #438 follow-up (surfaced in #463 review): a call inside a C++ out-of-line + * method definition must attribute to the enclosing Method node, not fall back to + * the File node — including when the definition is wrapped in a `namespace {}` + * block. Covers the same call written at global scope (`using namespace`) and + * inside a namespace block; both must source from a Method node after the fix. */ +TEST(pipeline_cpp_out_of_line_call_attribution) { + if (setup_empty_repo() != 0) { + FAIL("failed to create temp dir"); + } + /* Baz has an inline body so it is a graph node the Baz() calls can resolve to; + * BarGlobal/BarBlock are declared here and defined out-of-line in the .cc files. */ + write_repo_file("foo.h", "namespace mylib { class Foo { public:\n" + " void BarGlobal(); void BarBlock(); void Baz() {} }; }\n"); + /* Out-of-line def at global scope via `using namespace`. */ + write_repo_file("foo_global.cc", "#include \"foo.h\"\n" + "using namespace mylib;\n" + "void Foo::BarGlobal() { Baz(); }\n"); + /* Identical out-of-line def, but wrapped in a namespace block (the bug). */ + write_repo_file("foo_block.cc", "#include \"foo.h\"\n" + "namespace mylib { void Foo::BarBlock() { Baz(); } }\n"); + + char db_path[512]; + snprintf(db_path, sizeof(db_path), "%s/test_ool.db", g_tmpdir); + cbm_pipeline_t *p = cbm_pipeline_new(g_tmpdir, db_path, CBM_MODE_FULL); + ASSERT_NOT_NULL(p); + ASSERT_EQ(cbm_pipeline_run(p), 0); + + cbm_store_t *s = cbm_store_open_path(db_path); + ASSERT_NOT_NULL(s); + sqlite3 *db = cbm_store_get_db(s); + + /* Both forms' Baz() calls must source from a Method node, not File/Module. */ + char glob_lbl[64] = {0}, block_lbl[64] = {0}; + int g_ok = calls_source_label(db, "Baz", "foo_global", glob_lbl, sizeof(glob_lbl)); + int b_ok = calls_source_label(db, "Baz", "foo_block", block_lbl, sizeof(block_lbl)); + + ASSERT_EQ(g_ok, 0); + ASSERT_STR_EQ(glob_lbl, "Method"); + ASSERT_EQ(b_ok, 0); + ASSERT_STR_EQ(block_lbl, "Method"); + + cbm_store_close(s); + cbm_pipeline_free(p); + teardown_test_repo(); + PASS(); +} + /* ── Git history pass tests ─────────────────────────────────────── */ TEST(githistory_is_trackable) { @@ -6177,6 +6276,7 @@ SUITE(pipeline) { /* Calls pass */ RUN_TEST(pipeline_calls_resolution); RUN_TEST(pipeline_incremental_preserves_cross_file_calls); + RUN_TEST(pipeline_cpp_out_of_line_call_attribution); /* Git history pass */ RUN_TEST(githistory_is_trackable); RUN_TEST(githistory_compute_coupling);