Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 1 addition & 51 deletions internal/cbm/extract_defs.c
Original file line number Diff line number Diff line change
Expand Up @@ -458,56 +458,6 @@ static TSNode resolve_func_name_fp(TSNode node, CBMLanguage lang, const char *ki
return null_node;
}

// C++/CUDA: out-of-line method definitions name the function with a qualified
// declarator (`Foo::bar`, or `ns::Foo::bar`). Return the immediate enclosing
// class name (the scope segment directly left of the function name, e.g. "Foo"),
// or NULL when the declarator is unqualified (a plain free function). Without
// this, an out-of-line definition — whose class body lives declaration-only in a
// header — would be recorded as a free Function with no link to its class.
static char *cpp_out_of_line_parent_class(CBMArena *a, TSNode node, const char *source) {
// Descend the declarator chain to its qualified_identifier, if any.
TSNode qid = {0};
TSNode decl = ts_node_child_by_field_name(node, TS_FIELD("declarator"));
for (int depth = 0; depth < DECLARATOR_DEPTH_LIMIT && !ts_node_is_null(decl); depth++) {
const char *dk = ts_node_type(decl);
if (strcmp(dk, "qualified_identifier") == 0 || strcmp(dk, "scoped_identifier") == 0) {
qid = decl;
break;
}
TSNode inner = ts_node_child_by_field_name(decl, TS_FIELD("declarator"));
if (ts_node_is_null(inner) && ts_node_named_child_count(decl) > 0) {
inner = ts_node_named_child(decl, 0);
}
if (ts_node_is_null(inner)) {
break;
}
decl = inner;
}
if (ts_node_is_null(qid)) {
return NULL;
}
// The qualified_identifier's `scope` is the parent. For a nested scope
// (`ns::Foo`) descend through its `name` field to the innermost segment so
// the direct parent ("Foo") is returned, not the outer namespace.
TSNode scope = ts_node_child_by_field_name(qid, TS_FIELD("scope"));
if (ts_node_is_null(scope)) {
return NULL;
}
for (int depth = 0; depth < DECLARATOR_DEPTH_LIMIT; depth++) {
const char *sk = ts_node_type(scope);
if (strcmp(sk, "qualified_identifier") != 0 && strcmp(sk, "scoped_identifier") != 0) {
break;
}
TSNode name = ts_node_child_by_field_name(scope, TS_FIELD("name"));
if (ts_node_is_null(name)) {
break;
}
scope = name;
}
char *text = cbm_node_text(a, scope, source);
return (text && text[0]) ? text : NULL;
}

// R: resolve function_definition name from parent binary_operator lhs.
static TSNode resolve_r_func_name(TSNode node) {
TSNode parent = ts_node_parent(node);
Expand Down Expand Up @@ -2747,7 +2697,7 @@ static void extract_func_def(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec
// class node QN computed the same way) so DEFINES_METHOD edges resolve.
if ((ctx->language == CBM_LANG_CPP || ctx->language == CBM_LANG_CUDA) &&
strcmp(ts_node_type(node), "function_definition") == 0) {
char *scope_name = cpp_out_of_line_parent_class(a, node, ctx->source);
char *scope_name = cbm_cpp_out_of_line_parent_class(a, node, ctx->source);
if (scope_name && scope_name[0]) {
const char *class_qn = cbm_fqn_compute(a, ctx->project, ctx->rel_path, scope_name);
def.qualified_name = cbm_arena_sprintf(a, "%s.%s", class_qn, name);
Expand Down
13 changes: 13 additions & 0 deletions internal/cbm/extract_unified.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,19 @@ static const char *compute_func_qn(CBMExtractCtx *ctx, TSNode node, const CBMLan
return NULL;
}

// C++/CUDA out-of-line method definition (`Foo::Bar` in a .cc, with or without a
// surrounding `namespace {}` block): the class body lives declaration-only in a
// header, so there is no class scope on the walk stack. Scope the QN to its class
// (matching the defs extractor) so a call inside the body attributes to the
// Method node instead of the File node.
if (ctx->language == CBM_LANG_CPP || ctx->language == CBM_LANG_CUDA) {
const char *ool = cbm_cpp_out_of_line_method_qn(ctx->arena, node, ctx->source, ctx->project,
ctx->rel_path, name);
if (ool) {
return ool;
}
}

if (state->enclosing_class_qn) {
return cbm_arena_sprintf(ctx->arena, "%s.%s", state->enclosing_class_qn, name);
}
Expand Down
68 changes: 68 additions & 0 deletions internal/cbm/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,61 @@ static const char *func_node_name(CBMArena *a, TSNode func_node, const char *sou
return NULL;
}

char *cbm_cpp_out_of_line_parent_class(CBMArena *a, TSNode func_node, const char *source) {
// Descend the declarator chain to its qualified_identifier, if any.
TSNode qid = {0};
TSNode decl = ts_node_child_by_field_name(func_node, TS_FIELD("declarator"));
for (int depth = 0; depth < CBM_DECLARATOR_DEPTH_LIMIT && !ts_node_is_null(decl); depth++) {
const char *dk = ts_node_type(decl);
if (strcmp(dk, "qualified_identifier") == 0 || strcmp(dk, "scoped_identifier") == 0) {
qid = decl;
break;
}
TSNode inner = ts_node_child_by_field_name(decl, TS_FIELD("declarator"));
if (ts_node_is_null(inner) && ts_node_named_child_count(decl) > 0) {
inner = ts_node_named_child(decl, 0);
}
if (ts_node_is_null(inner)) {
break;
}
decl = inner;
}
if (ts_node_is_null(qid)) {
return NULL;
}
// The qualified_identifier's `scope` is the parent. For a nested scope
// (`ns::Foo`) descend through its `name` field to the innermost segment so
// the direct parent ("Foo") is returned, not the outer namespace.
TSNode scope = ts_node_child_by_field_name(qid, TS_FIELD("scope"));
if (ts_node_is_null(scope)) {
return NULL;
}
for (int depth = 0; depth < CBM_DECLARATOR_DEPTH_LIMIT; depth++) {
const char *sk = ts_node_type(scope);
if (strcmp(sk, "qualified_identifier") != 0 && strcmp(sk, "scoped_identifier") != 0) {
break;
}
TSNode name = ts_node_child_by_field_name(scope, TS_FIELD("name"));
if (ts_node_is_null(name)) {
break;
}
scope = name;
}
char *text = cbm_node_text(a, scope, source);
return (text && text[0]) ? text : NULL;
}

const char *cbm_cpp_out_of_line_method_qn(CBMArena *a, TSNode func_node, const char *source,
const char *project, const char *rel_path,
const char *name) {
char *scope = cbm_cpp_out_of_line_parent_class(a, func_node, source);
if (!scope || !scope[0]) {
return NULL;
}
const char *class_qn = cbm_fqn_compute(a, project, rel_path, scope);
return cbm_arena_sprintf(a, "%s.%s", class_qn, name);
}

const char *cbm_enclosing_func_qn(CBMArena *a, TSNode node, CBMLanguage lang, const char *source,
const char *project, const char *rel_path,
const char *module_qn) {
Expand Down Expand Up @@ -838,6 +893,19 @@ const char *cbm_enclosing_func_qn(CBMArena *a, TSNode node, CBMLanguage lang, co
}
}

// C++/CUDA out-of-line method definition (`Foo::Bar` in a .cc, with or without
// a surrounding `namespace {}` block): the class body lives declaration-only in
// a header, so there is no enclosing class AST node for the parent walk above to
// find. Reconstruct the class-scoped QN so a call inside the body attributes to
// the Method node instead of falling back to the File node.
if (lang == CBM_LANG_CPP || lang == CBM_LANG_CUDA) {
const char *ool =
cbm_cpp_out_of_line_method_qn(a, func_node, source, project, rel_path, name);
if (ool) {
return ool;
}
}

return cbm_fqn_compute(a, project, rel_path, name);
}

Expand Down
18 changes: 18 additions & 0 deletions internal/cbm/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,24 @@ const char *cbm_enclosing_func_qn_cached(CBMExtractCtx *ctx, TSNode node);
// enclosing-function attribution — drift between private copies caused #438.
TSNode cbm_resolve_c_declarator_name_node(TSNode func_node);

// C++/CUDA: out-of-line method definitions name the function with a qualified
// declarator (`Foo::bar`, or `ns::Foo::bar`). Return the immediate enclosing
// class name (the scope segment directly left of the function name, e.g. "Foo"),
// or NULL when the declarator is unqualified (a plain free function). Shared by
// the defs extractor (to scope the Method node's QN) and the calls extractor (to
// build the matching enclosing-function QN) so a call inside an out-of-line
// definition attributes to the Method rather than the File node (#438 follow-up).
char *cbm_cpp_out_of_line_parent_class(CBMArena *a, TSNode func_node, const char *source);

// Class-scoped QN ("<classQN>.<name>") for a C++/CUDA out-of-line method
// definition, or NULL when `func_node` is not one (plain free function / inline
// method). `name` is the already-resolved function name. Both call extractors use
// this so a call inside the body attributes to the Method node, matching the QN
// the defs extractor builds from cbm_cpp_out_of_line_parent_class.
const char *cbm_cpp_out_of_line_method_qn(CBMArena *a, TSNode func_node, const char *source,
const char *project, const char *rel_path,
const char *name);

// Find a child node by kind string.
TSNode cbm_find_child_by_kind(TSNode parent, const char *kind);

Expand Down
47 changes: 47 additions & 0 deletions tests/test_extraction.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,52 @@ static CBMFileResult *extract(const char *src, CBMLanguage lang, const char *pro
return r;
}

/* Issue #438 follow-up: a call inside a C++ out-of-line method definition
* (`void Foo::Bar() { Baz(); }`) must attribute to the enclosing Method, whose QN
* the defs extractor scopes to its class (`t.path.Foo.Bar`). The calls extractor
* computes enclosing_func_qn independently, so the two must agree byte-for-byte or
* the pipeline's exact-QN match drops the call to the File node. This must hold
* regardless of any surrounding `namespace {}` block (namespace context is not part
* of the C++ QN scheme). Guards the call-side reconstruction of the class qualifier. */
TEST(cpp_out_of_line_enclosing_qn) {
struct {
const char *src;
const char *path;
const char *want_enc; /* expected call enclosing_func_qn == Method def qn */
} cases[] = {
{"using namespace mylib;\nvoid Foo::Bar() { Baz(); }\n", "g.cc", "t.g.Foo.Bar"},
{"namespace mylib { void Foo::Bar() { Baz(); } }\n", "b.cc", "t.b.Foo.Bar"},
{"namespace mylib { namespace inner { void Foo::Bar() { Baz(); } } }\n", "n.cc",
"t.n.Foo.Bar"},
};
for (int c = 0; c < 3; c++) {
CBMFileResult *r = extract(cases[c].src, CBM_LANG_CPP, "t", cases[c].path);
ASSERT_NOT_NULL(r);
ASSERT_FALSE(r->has_error);
/* The out-of-line def is promoted to a class-scoped Method. */
int saw_method = 0;
for (int i = 0; i < r->defs.count; i++) {
if (strcmp(r->defs.items[i].label, "Method") == 0 &&
strcmp(r->defs.items[i].qualified_name, cases[c].want_enc) == 0) {
saw_method = 1;
}
}
ASSERT(saw_method);
/* The Baz() call's enclosing QN matches that Method QN exactly. */
int saw_baz = 0;
for (int i = 0; i < r->calls.count; i++) {
if (strcmp(r->calls.items[i].callee_name, "Baz") == 0) {
saw_baz = 1;
ASSERT_NOT_NULL(r->calls.items[i].enclosing_func_qn);
ASSERT(strcmp(r->calls.items[i].enclosing_func_qn, cases[c].want_enc) == 0);
}
}
ASSERT(saw_baz);
cbm_free_result(r);
}
PASS();
}

/* ═══════════════════════════════════════════════════════════════════
* Group A: OOP Languages
* ═══════════════════════════════════════════════════════════════════ */
Expand Down Expand Up @@ -3117,6 +3163,7 @@ SUITE(extraction) {
RUN_TEST(wolfram_call);
RUN_TEST(wolfram_caller_attribution);
RUN_TEST(c_caller_attribution);
RUN_TEST(cpp_out_of_line_enclosing_qn);
RUN_TEST(wolfram_parse);
RUN_TEST(wolfram_import);
RUN_TEST(wolfram_nested_def);
Expand Down
100 changes: 100 additions & 0 deletions tests/test_pipeline.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "store/store.h"
#include "git/git_context.h"
#include "foundation/dump_verify.h"
#include <sqlite3.h> // raw edge-source query for out-of-line call attribution (#438 follow-up)

#include <stdlib.h>
#include <string.h>
Expand Down Expand Up @@ -97,6 +98,31 @@ static void teardown_test_repo(void) {
g_tmpdir[0] = '\0';
}

/* Create an empty temp repo (caller writes files via write_repo_file). */
static int setup_empty_repo(void) {
snprintf(g_tmpdir, sizeof(g_tmpdir), "/tmp/cbm_test_XXXXXX");
return cbm_mkdtemp(g_tmpdir) ? 0 : -1;
}

/* Write one file (relative path under g_tmpdir), creating parent dirs. */
static int write_repo_file(const char *rel, const char *content) {
char path[700];
snprintf(path, sizeof(path), "%s/%s", g_tmpdir, rel);
char *slash = strrchr(path, '/');
if (slash && slash > path + strlen(g_tmpdir)) {
*slash = '\0';
cbm_mkdir_p(path, 0755);
*slash = '/';
}
FILE *f = fopen(path, "wb");
if (!f) {
return -1;
}
fputs(content, f);
fclose(f);
return 0;
}

/* ── Lifecycle tests ─────────────────────────────────────────────── */

TEST(pipeline_create_free) {
Expand Down Expand Up @@ -907,6 +933,79 @@ TEST(pipeline_incremental_preserves_cross_file_calls) {
PASS();
}

/* Return the label of the node that SOURCES the CALLS edge to `callee`, where the
* source node's QN contains `src_stem` (the defining file's stem). Caller-owned
* buffer. Returns -1 if no such CALLS edge, else CBM_STORE_OK with label copied. */
static int calls_source_label(sqlite3 *db, const char *callee, const char *src_stem, char *out,
size_t out_sz) {
const char *sql = "SELECT src.label FROM edges e "
"JOIN nodes src ON e.source_id = src.id "
"JOIN nodes dst ON e.target_id = dst.id "
"WHERE e.type='CALLS' AND dst.name=?1 "
"AND src.qualified_name LIKE '%' || ?2 || '%' LIMIT 1;";
sqlite3_stmt *st = NULL;
if (sqlite3_prepare_v2(db, sql, -1, &st, NULL) != SQLITE_OK) {
return -1;
}
sqlite3_bind_text(st, 1, callee, -1, SQLITE_STATIC);
sqlite3_bind_text(st, 2, src_stem, -1, SQLITE_STATIC);
int found = -1;
if (sqlite3_step(st) == SQLITE_ROW) {
const unsigned char *lbl = sqlite3_column_text(st, 0);
snprintf(out, out_sz, "%s", lbl ? (const char *)lbl : "");
found = 0;
}
sqlite3_finalize(st);
return found;
}

/* Issue #438 follow-up (surfaced in #463 review): a call inside a C++ out-of-line
* method definition must attribute to the enclosing Method node, not fall back to
* the File node — including when the definition is wrapped in a `namespace {}`
* block. Covers the same call written at global scope (`using namespace`) and
* inside a namespace block; both must source from a Method node after the fix. */
TEST(pipeline_cpp_out_of_line_call_attribution) {
if (setup_empty_repo() != 0) {
FAIL("failed to create temp dir");
}
/* Baz has an inline body so it is a graph node the Baz() calls can resolve to;
* BarGlobal/BarBlock are declared here and defined out-of-line in the .cc files. */
write_repo_file("foo.h", "namespace mylib { class Foo { public:\n"
" void BarGlobal(); void BarBlock(); void Baz() {} }; }\n");
/* Out-of-line def at global scope via `using namespace`. */
write_repo_file("foo_global.cc", "#include \"foo.h\"\n"
"using namespace mylib;\n"
"void Foo::BarGlobal() { Baz(); }\n");
/* Identical out-of-line def, but wrapped in a namespace block (the bug). */
write_repo_file("foo_block.cc", "#include \"foo.h\"\n"
"namespace mylib { void Foo::BarBlock() { Baz(); } }\n");

char db_path[512];
snprintf(db_path, sizeof(db_path), "%s/test_ool.db", g_tmpdir);
cbm_pipeline_t *p = cbm_pipeline_new(g_tmpdir, db_path, CBM_MODE_FULL);
ASSERT_NOT_NULL(p);
ASSERT_EQ(cbm_pipeline_run(p), 0);

cbm_store_t *s = cbm_store_open_path(db_path);
ASSERT_NOT_NULL(s);
sqlite3 *db = cbm_store_get_db(s);

/* Both forms' Baz() calls must source from a Method node, not File/Module. */
char glob_lbl[64] = {0}, block_lbl[64] = {0};
int g_ok = calls_source_label(db, "Baz", "foo_global", glob_lbl, sizeof(glob_lbl));
int b_ok = calls_source_label(db, "Baz", "foo_block", block_lbl, sizeof(block_lbl));

ASSERT_EQ(g_ok, 0);
ASSERT_STR_EQ(glob_lbl, "Method");
ASSERT_EQ(b_ok, 0);
ASSERT_STR_EQ(block_lbl, "Method");

cbm_store_close(s);
cbm_pipeline_free(p);
teardown_test_repo();
PASS();
}

/* ── Git history pass tests ─────────────────────────────────────── */

TEST(githistory_is_trackable) {
Expand Down Expand Up @@ -6177,6 +6276,7 @@ SUITE(pipeline) {
/* Calls pass */
RUN_TEST(pipeline_calls_resolution);
RUN_TEST(pipeline_incremental_preserves_cross_file_calls);
RUN_TEST(pipeline_cpp_out_of_line_call_attribution);
/* Git history pass */
RUN_TEST(githistory_is_trackable);
RUN_TEST(githistory_compute_coupling);
Expand Down