Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 3 additions & 47 deletions internal/cbm/extract_defs.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
// Tree traversal limits.
enum {
TEMPLATE_DEPTH_LIMIT = 4,
DECLARATOR_DEPTH_LIMIT = 8,
DECLARATOR_DEPTH_LIMIT = CBM_DECLARATOR_DEPTH_LIMIT, // shared define in helpers.h

EXPORT_ANCESTOR_DEPTH = 4,
DECORATOR_SCAN_LIMIT = 3,
C_RETURN_WALK_DEPTH = 5,
Expand Down Expand Up @@ -457,27 +458,6 @@ static TSNode resolve_func_name_fp(TSNode node, CBMLanguage lang, const char *ki
return null_node;
}

// Check if a node type is a terminal C declarator name.
static bool is_c_terminal_name(const char *dk) {
return strcmp(dk, "identifier") == 0 || strcmp(dk, "field_identifier") == 0 ||
strcmp(dk, "operator_name") == 0 || strcmp(dk, "operator_cast") == 0 ||
strcmp(dk, "destructor_name") == 0;
}

// Resolve name from a C++ qualified_identifier/scoped_identifier.
static TSNode resolve_qualified_name(TSNode decl) {
static const char *name_kinds[] = {"operator_name", "operator_cast", "destructor_name",
"identifier", "field_identifier", NULL};
for (const char **k = name_kinds; *k; k++) {
TSNode found = cbm_find_child_by_kind(decl, *k);
if (!ts_node_is_null(found)) {
return found;
}
}
TSNode null_node = {0};
return null_node;
}

// C++/CUDA: out-of-line method definitions name the function with a qualified
// declarator (`Foo::bar`, or `ns::Foo::bar`). Return the immediate enclosing
// class name (the scope segment directly left of the function name, e.g. "Foo"),
Expand Down Expand Up @@ -528,30 +508,6 @@ static char *cpp_out_of_line_parent_class(CBMArena *a, TSNode node, const char *
return (text && text[0]) ? text : NULL;
}

// Resolve function name from C/C++/CUDA/GLSL declarator chain.
static TSNode resolve_c_declarator_name(TSNode node) {
TSNode decl = ts_node_child_by_field_name(node, TS_FIELD("declarator"));
for (int depth = 0; depth < DECLARATOR_DEPTH_LIMIT && !ts_node_is_null(decl); depth++) {
const char *dk = ts_node_type(decl);
if (is_c_terminal_name(dk)) {
return decl;
}
if (strcmp(dk, "qualified_identifier") == 0 || strcmp(dk, "scoped_identifier") == 0) {
return resolve_qualified_name(decl);
}
TSNode inner = ts_node_child_by_field_name(decl, TS_FIELD("declarator"));
if (ts_node_is_null(inner) && ts_node_named_child_count(decl) > 0) {
inner = ts_node_named_child(decl, 0);
}
if (ts_node_is_null(inner)) {
break;
}
decl = inner;
}
TSNode null_node = {0};
return null_node;
}

// R: resolve function_definition name from parent binary_operator lhs.
static TSNode resolve_r_func_name(TSNode node) {
TSNode parent = ts_node_parent(node);
Expand Down Expand Up @@ -675,7 +631,7 @@ static TSNode resolve_func_name_c_family(TSNode *node_ptr, CBMLanguage lang, con
lang == CBM_LANG_GLSL || lang == CBM_LANG_HLSL || lang == CBM_LANG_ISPC ||
lang == CBM_LANG_SLANG) &&
strcmp(kind, "function_definition") == 0) {
return resolve_c_declarator_name(*node_ptr);
return cbm_resolve_c_declarator_name_node(*node_ptr);
}
TSNode null_node = {0};
return null_node;
Expand Down
4 changes: 4 additions & 0 deletions internal/cbm/extract_unified.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ static TSNode resolve_func_name_node(TSNode node) {
if (ts_node_is_null(name_node) && strcmp(ts_node_type(node), "function_declaration") == 0) {
name_node = cbm_find_child_by_kind(node, "simple_identifier");
}
/* C/C++/CUDA/GLSL: function_definition name lives in the declarator chain. */
if (ts_node_is_null(name_node) && strcmp(ts_node_type(node), "function_definition") == 0) {
name_node = cbm_resolve_c_declarator_name_node(node);
}
return name_node;
}

Expand Down
54 changes: 53 additions & 1 deletion internal/cbm/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,52 @@ TSNode cbm_find_enclosing_func(TSNode node, CBMLanguage lang) {
return null_node;
}

// Get the name of a function node (basic: try "name" field)
// Check if a node type is a terminal C declarator name.
static bool is_c_terminal_name(const char *dk) {
return strcmp(dk, "identifier") == 0 || strcmp(dk, "field_identifier") == 0 ||
strcmp(dk, "operator_name") == 0 || strcmp(dk, "operator_cast") == 0 ||
strcmp(dk, "destructor_name") == 0;
}

// Resolve name from a C++ qualified_identifier/scoped_identifier.
static TSNode resolve_qualified_name(TSNode decl) {
static const char *name_kinds[] = {"operator_name", "operator_cast", "destructor_name",
"identifier", "field_identifier", NULL};
for (const char **k = name_kinds; *k; k++) {
TSNode found = cbm_find_child_by_kind(decl, *k);
if (!ts_node_is_null(found)) {
return found;
}
}
TSNode null_node = {0};
return null_node;
}

// Resolve function name from C/C++/CUDA/GLSL declarator chain. Shared canonical
// implementation — see the header for the full rationale (#438).
TSNode cbm_resolve_c_declarator_name_node(TSNode func_node) {
TSNode decl = ts_node_child_by_field_name(func_node, TS_FIELD("declarator"));
for (int depth = 0; depth < CBM_DECLARATOR_DEPTH_LIMIT && !ts_node_is_null(decl); depth++) {
const char *dk = ts_node_type(decl);
if (is_c_terminal_name(dk)) {
return decl;
}
if (strcmp(dk, "qualified_identifier") == 0 || strcmp(dk, "scoped_identifier") == 0) {
return resolve_qualified_name(decl);
}
TSNode inner = ts_node_child_by_field_name(decl, TS_FIELD("declarator"));
if (ts_node_is_null(inner) && ts_node_named_child_count(decl) > 0) {
inner = ts_node_named_child(decl, 0);
}
if (ts_node_is_null(inner)) {
break;
}
decl = inner;
}
TSNode null_node = {0};
return null_node;
}

static const char *func_node_name(CBMArena *a, TSNode func_node, const char *source,
CBMLanguage lang) {
// Wolfram: set_delayed_top/set_top/set_delayed/set — LHS is apply(user_symbol("f"), ...)
Expand Down Expand Up @@ -752,6 +797,13 @@ static const char *func_node_name(CBMArena *a, TSNode func_node, const char *sou
}
}
}
// C/C++/CUDA/GLSL: function_definition carries its name in the declarator chain.
if (strcmp(ts_node_type(func_node), "function_definition") == 0) {
TSNode dn = cbm_resolve_c_declarator_name_node(func_node);
if (!ts_node_is_null(dn)) {
return cbm_node_text(a, dn, source);
}
}
return NULL;
}

Expand Down
14 changes: 14 additions & 0 deletions internal/cbm/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,20 @@ const char *cbm_enclosing_func_qn(CBMArena *a, TSNode node, CBMLanguage lang, co
// Cached version: uses ctx->ef_cache to avoid repeated parent-chain walks.
const char *cbm_enclosing_func_qn_cached(CBMExtractCtx *ctx, TSNode node);

// Max declarator-chain descent depth for C/C++/CUDA/GLSL function-name
// resolution. Single source of truth — extract_defs.c's DECLARATOR_DEPTH_LIMIT
// is derived from this so the three extractors cannot drift.
#define CBM_DECLARATOR_DEPTH_LIMIT 8

// Resolve the function-name node for a C/C++/CUDA/GLSL `function_definition`.
// Such nodes have no `name` field — the name is nested in the declarator chain
// (pointer/function/parenthesized/array declarators wrap it; out-of-line method
// definitions name it with a qualified_identifier). Descends the `declarator`
// field to the innermost name node and returns it, or a null node if none is
// found. Shared by the defs, calls, and unified extractors so all three agree on
// enclosing-function attribution — drift between private copies caused #438.
TSNode cbm_resolve_c_declarator_name_node(TSNode func_node);

// Find a child node by kind string.
TSNode cbm_find_child_by_kind(TSNode parent, const char *kind);

Expand Down
26 changes: 26 additions & 0 deletions tests/test_extraction.c
Original file line number Diff line number Diff line change
Expand Up @@ -1787,6 +1787,31 @@ TEST(wolfram_caller_attribution) {
PASS();
}

/* Issue #438: a C function_definition has no `name` field — the name lives in the
* declarator chain. Calls inside a C function must be attributed to the enclosing
* function, not the module. Pre-fix, enclosing_func_qn fell back to the module QN. */
TEST(c_caller_attribution) {
CBMFileResult *r = extract("int helper(int x) { return x; }\n"
"int caller(void) { return helper(1); }\n",
CBM_LANG_C, "t", "main.c");
ASSERT_NOT_NULL(r);
ASSERT_FALSE(r->has_error);
ASSERT_GT(r->calls.count, 0);
int saw_helper = 0;
for (int i = 0; i < r->calls.count; i++) {
if (strcmp(r->calls.items[i].callee_name, "helper") == 0) {
saw_helper = 1;
/* enclosing_func_qn must be the function, NOT empty and NOT the module QN. */
ASSERT_NOT_NULL(r->calls.items[i].enclosing_func_qn);
ASSERT_FALSE(strcmp(r->calls.items[i].enclosing_func_qn, "") == 0);
ASSERT_FALSE(strcmp(r->calls.items[i].enclosing_func_qn, "t.main") == 0);
}
}
ASSERT(saw_helper);
cbm_free_result(r);
PASS();
}

/* --- Wolfram parse (simple assignment) --- */
TEST(wolfram_parse) {
CBMFileResult *r = extract("x = 42;\ny = x + 1;\n", CBM_LANG_WOLFRAM, "t", "simple.wl");
Expand Down Expand Up @@ -3091,6 +3116,7 @@ SUITE(extraction) {
RUN_TEST(wolfram_function_extended);
RUN_TEST(wolfram_call);
RUN_TEST(wolfram_caller_attribution);
RUN_TEST(c_caller_attribution);
RUN_TEST(wolfram_parse);
RUN_TEST(wolfram_import);
RUN_TEST(wolfram_nested_def);
Expand Down
Loading