diff --git a/internal/cbm/extract_defs.c b/internal/cbm/extract_defs.c index bfff34fb..63e77b21 100644 --- a/internal/cbm/extract_defs.c +++ b/internal/cbm/extract_defs.c @@ -23,7 +23,8 @@ // Tree traversal limits. enum { TEMPLATE_DEPTH_LIMIT = 4, - DECLARATOR_DEPTH_LIMIT = 8, + DECLARATOR_DEPTH_LIMIT = CBM_DECLARATOR_DEPTH_LIMIT, // shared define in helpers.h + EXPORT_ANCESTOR_DEPTH = 4, DECORATOR_SCAN_LIMIT = 3, C_RETURN_WALK_DEPTH = 5, @@ -457,27 +458,6 @@ static TSNode resolve_func_name_fp(TSNode node, CBMLanguage lang, const char *ki return null_node; } -// Check if a node type is a terminal C declarator name. -static bool is_c_terminal_name(const char *dk) { - return strcmp(dk, "identifier") == 0 || strcmp(dk, "field_identifier") == 0 || - strcmp(dk, "operator_name") == 0 || strcmp(dk, "operator_cast") == 0 || - strcmp(dk, "destructor_name") == 0; -} - -// Resolve name from a C++ qualified_identifier/scoped_identifier. -static TSNode resolve_qualified_name(TSNode decl) { - static const char *name_kinds[] = {"operator_name", "operator_cast", "destructor_name", - "identifier", "field_identifier", NULL}; - for (const char **k = name_kinds; *k; k++) { - TSNode found = cbm_find_child_by_kind(decl, *k); - if (!ts_node_is_null(found)) { - return found; - } - } - TSNode null_node = {0}; - return null_node; -} - // C++/CUDA: out-of-line method definitions name the function with a qualified // declarator (`Foo::bar`, or `ns::Foo::bar`). Return the immediate enclosing // class name (the scope segment directly left of the function name, e.g. "Foo"), @@ -528,30 +508,6 @@ static char *cpp_out_of_line_parent_class(CBMArena *a, TSNode node, const char * return (text && text[0]) ? text : NULL; } -// Resolve function name from C/C++/CUDA/GLSL declarator chain. -static TSNode resolve_c_declarator_name(TSNode node) { - TSNode decl = ts_node_child_by_field_name(node, TS_FIELD("declarator")); - for (int depth = 0; depth < DECLARATOR_DEPTH_LIMIT && !ts_node_is_null(decl); depth++) { - const char *dk = ts_node_type(decl); - if (is_c_terminal_name(dk)) { - return decl; - } - if (strcmp(dk, "qualified_identifier") == 0 || strcmp(dk, "scoped_identifier") == 0) { - return resolve_qualified_name(decl); - } - TSNode inner = ts_node_child_by_field_name(decl, TS_FIELD("declarator")); - if (ts_node_is_null(inner) && ts_node_named_child_count(decl) > 0) { - inner = ts_node_named_child(decl, 0); - } - if (ts_node_is_null(inner)) { - break; - } - decl = inner; - } - TSNode null_node = {0}; - return null_node; -} - // R: resolve function_definition name from parent binary_operator lhs. static TSNode resolve_r_func_name(TSNode node) { TSNode parent = ts_node_parent(node); @@ -675,7 +631,7 @@ static TSNode resolve_func_name_c_family(TSNode *node_ptr, CBMLanguage lang, con lang == CBM_LANG_GLSL || lang == CBM_LANG_HLSL || lang == CBM_LANG_ISPC || lang == CBM_LANG_SLANG) && strcmp(kind, "function_definition") == 0) { - return resolve_c_declarator_name(*node_ptr); + return cbm_resolve_c_declarator_name_node(*node_ptr); } TSNode null_node = {0}; return null_node; diff --git a/internal/cbm/extract_unified.c b/internal/cbm/extract_unified.c index 7274158f..f65a64be 100644 --- a/internal/cbm/extract_unified.c +++ b/internal/cbm/extract_unified.c @@ -101,6 +101,10 @@ static TSNode resolve_func_name_node(TSNode node) { if (ts_node_is_null(name_node) && strcmp(ts_node_type(node), "function_declaration") == 0) { name_node = cbm_find_child_by_kind(node, "simple_identifier"); } + /* C/C++/CUDA/GLSL: function_definition name lives in the declarator chain. */ + if (ts_node_is_null(name_node) && strcmp(ts_node_type(node), "function_definition") == 0) { + name_node = cbm_resolve_c_declarator_name_node(node); + } return name_node; } diff --git a/internal/cbm/helpers.c b/internal/cbm/helpers.c index 1efa6b81..c34be9b7 100644 --- a/internal/cbm/helpers.c +++ b/internal/cbm/helpers.c @@ -717,7 +717,52 @@ TSNode cbm_find_enclosing_func(TSNode node, CBMLanguage lang) { return null_node; } -// Get the name of a function node (basic: try "name" field) +// Check if a node type is a terminal C declarator name. +static bool is_c_terminal_name(const char *dk) { + return strcmp(dk, "identifier") == 0 || strcmp(dk, "field_identifier") == 0 || + strcmp(dk, "operator_name") == 0 || strcmp(dk, "operator_cast") == 0 || + strcmp(dk, "destructor_name") == 0; +} + +// Resolve name from a C++ qualified_identifier/scoped_identifier. +static TSNode resolve_qualified_name(TSNode decl) { + static const char *name_kinds[] = {"operator_name", "operator_cast", "destructor_name", + "identifier", "field_identifier", NULL}; + for (const char **k = name_kinds; *k; k++) { + TSNode found = cbm_find_child_by_kind(decl, *k); + if (!ts_node_is_null(found)) { + return found; + } + } + TSNode null_node = {0}; + return null_node; +} + +// Resolve function name from C/C++/CUDA/GLSL declarator chain. Shared canonical +// implementation — see the header for the full rationale (#438). +TSNode cbm_resolve_c_declarator_name_node(TSNode func_node) { + TSNode decl = ts_node_child_by_field_name(func_node, TS_FIELD("declarator")); + for (int depth = 0; depth < CBM_DECLARATOR_DEPTH_LIMIT && !ts_node_is_null(decl); depth++) { + const char *dk = ts_node_type(decl); + if (is_c_terminal_name(dk)) { + return decl; + } + if (strcmp(dk, "qualified_identifier") == 0 || strcmp(dk, "scoped_identifier") == 0) { + return resolve_qualified_name(decl); + } + TSNode inner = ts_node_child_by_field_name(decl, TS_FIELD("declarator")); + if (ts_node_is_null(inner) && ts_node_named_child_count(decl) > 0) { + inner = ts_node_named_child(decl, 0); + } + if (ts_node_is_null(inner)) { + break; + } + decl = inner; + } + TSNode null_node = {0}; + return null_node; +} + static const char *func_node_name(CBMArena *a, TSNode func_node, const char *source, CBMLanguage lang) { // Wolfram: set_delayed_top/set_top/set_delayed/set — LHS is apply(user_symbol("f"), ...) @@ -752,6 +797,13 @@ static const char *func_node_name(CBMArena *a, TSNode func_node, const char *sou } } } + // C/C++/CUDA/GLSL: function_definition carries its name in the declarator chain. + if (strcmp(ts_node_type(func_node), "function_definition") == 0) { + TSNode dn = cbm_resolve_c_declarator_name_node(func_node); + if (!ts_node_is_null(dn)) { + return cbm_node_text(a, dn, source); + } + } return NULL; } diff --git a/internal/cbm/helpers.h b/internal/cbm/helpers.h index ea8154d4..35d10892 100644 --- a/internal/cbm/helpers.h +++ b/internal/cbm/helpers.h @@ -36,6 +36,20 @@ const char *cbm_enclosing_func_qn(CBMArena *a, TSNode node, CBMLanguage lang, co // Cached version: uses ctx->ef_cache to avoid repeated parent-chain walks. const char *cbm_enclosing_func_qn_cached(CBMExtractCtx *ctx, TSNode node); +// Max declarator-chain descent depth for C/C++/CUDA/GLSL function-name +// resolution. Single source of truth — extract_defs.c's DECLARATOR_DEPTH_LIMIT +// is derived from this so the three extractors cannot drift. +#define CBM_DECLARATOR_DEPTH_LIMIT 8 + +// Resolve the function-name node for a C/C++/CUDA/GLSL `function_definition`. +// Such nodes have no `name` field — the name is nested in the declarator chain +// (pointer/function/parenthesized/array declarators wrap it; out-of-line method +// definitions name it with a qualified_identifier). Descends the `declarator` +// field to the innermost name node and returns it, or a null node if none is +// found. Shared by the defs, calls, and unified extractors so all three agree on +// enclosing-function attribution — drift between private copies caused #438. +TSNode cbm_resolve_c_declarator_name_node(TSNode func_node); + // Find a child node by kind string. TSNode cbm_find_child_by_kind(TSNode parent, const char *kind); diff --git a/tests/test_extraction.c b/tests/test_extraction.c index d06b2a50..9aba9dc7 100644 --- a/tests/test_extraction.c +++ b/tests/test_extraction.c @@ -1787,6 +1787,31 @@ TEST(wolfram_caller_attribution) { PASS(); } +/* Issue #438: a C function_definition has no `name` field — the name lives in the + * declarator chain. Calls inside a C function must be attributed to the enclosing + * function, not the module. Pre-fix, enclosing_func_qn fell back to the module QN. */ +TEST(c_caller_attribution) { + CBMFileResult *r = extract("int helper(int x) { return x; }\n" + "int caller(void) { return helper(1); }\n", + CBM_LANG_C, "t", "main.c"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT_GT(r->calls.count, 0); + int saw_helper = 0; + for (int i = 0; i < r->calls.count; i++) { + if (strcmp(r->calls.items[i].callee_name, "helper") == 0) { + saw_helper = 1; + /* enclosing_func_qn must be the function, NOT empty and NOT the module QN. */ + ASSERT_NOT_NULL(r->calls.items[i].enclosing_func_qn); + ASSERT_FALSE(strcmp(r->calls.items[i].enclosing_func_qn, "") == 0); + ASSERT_FALSE(strcmp(r->calls.items[i].enclosing_func_qn, "t.main") == 0); + } + } + ASSERT(saw_helper); + cbm_free_result(r); + PASS(); +} + /* --- Wolfram parse (simple assignment) --- */ TEST(wolfram_parse) { CBMFileResult *r = extract("x = 42;\ny = x + 1;\n", CBM_LANG_WOLFRAM, "t", "simple.wl"); @@ -3091,6 +3116,7 @@ SUITE(extraction) { RUN_TEST(wolfram_function_extended); RUN_TEST(wolfram_call); RUN_TEST(wolfram_caller_attribution); + RUN_TEST(c_caller_attribution); RUN_TEST(wolfram_parse); RUN_TEST(wolfram_import); RUN_TEST(wolfram_nested_def);