From df8fa160fcaf21c077847b57a07362cfe942b34b Mon Sep 17 00:00:00 2001 From: Kris Kersey Date: Sun, 14 Jun 2026 15:06:05 +0000 Subject: [PATCH] fix(extract): attribute C/C++ CALLS edges to the enclosing function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A CALLS edge whose caller is a C/C++/CUDA/GLSL function was sourced to the file's Module node instead of the calling Function. "Find callers of X" returned a file path, outbound trace_path returned empty, and (:Function)-[:CALLS]->(:Function) queries missed for these languages. Root cause: the enclosing-function resolvers read only tree-sitter's `name` field, but a `function_definition` node has none — the name lives in the declarator chain (pointer/function/parenthesized/array declarators). So func_node_name() (internal/cbm/helpers.c) and resolve_func_name_node() (internal/cbm/extract_unified.c) returned NULL, the enclosing scope fell back to the module QN, and the edge was attributed to the Module node. This is the C counterpart to #220, which fixed the definition-naming path but not the enclosing-call path. Fix: descend the declarator chain to the innermost name node (mirroring resolve_c_declarator_name in extract_defs.c, including qualified and operator names) when a function_definition lacks a `name` field. Adds the regression test c_caller_attribution asserting a C call's enclosing_func_qn is the function, not the module. Fixes #438 Signed-off-by: Kris Kersey --- internal/cbm/extract_unified.c | 42 ++++++++++++++++++++++++++++++ internal/cbm/helpers.c | 47 ++++++++++++++++++++++++++++++++++ tests/test_extraction.c | 26 +++++++++++++++++++ 3 files changed, 115 insertions(+) diff --git a/internal/cbm/extract_unified.c b/internal/cbm/extract_unified.c index 7274158f..7c457fcc 100644 --- a/internal/cbm/extract_unified.c +++ b/internal/cbm/extract_unified.c @@ -87,6 +87,44 @@ static const char *compute_wolfram_func_qn(CBMExtractCtx *ctx, TSNode node) { return NULL; } +/* C/C++/CUDA/GLSL: function_definition has no `name` field — the name is nested + * in the declarator chain. Descend the `declarator` field to the innermost name + * node. Without this, the enclosing-function scope for calls made inside a C + * function resolves to NULL and the call is attributed to the module rather than + * the function (issue #438). Mirrors resolve_c_declarator_name() in extract_defs.c. */ +#ifndef CBM_DECLARATOR_DEPTH_LIMIT +#define CBM_DECLARATOR_DEPTH_LIMIT 8 /* matches DECLARATOR_DEPTH_LIMIT in extract_defs.c */ +#endif +static TSNode resolve_c_declarator_name_node(TSNode node) { + TSNode decl = ts_node_child_by_field_name(node, TS_FIELD("declarator")); + for (int depth = 0; depth < CBM_DECLARATOR_DEPTH_LIMIT && !ts_node_is_null(decl); depth++) { + const char *dk = ts_node_type(decl); + if (strcmp(dk, "identifier") == 0 || strcmp(dk, "field_identifier") == 0 || + strcmp(dk, "type_identifier") == 0 || strcmp(dk, "destructor_name") == 0 || + strcmp(dk, "operator_name") == 0 || strcmp(dk, "operator_cast") == 0) { + return decl; + } + if (strcmp(dk, "qualified_identifier") == 0 || strcmp(dk, "scoped_identifier") == 0) { + TSNode nm = ts_node_child_by_field_name(decl, TS_FIELD("name")); + if (!ts_node_is_null(nm)) { + decl = nm; + continue; + } + return decl; + } + TSNode inner = ts_node_child_by_field_name(decl, TS_FIELD("declarator")); + if (ts_node_is_null(inner) && ts_node_named_child_count(decl) > 0) { + inner = ts_node_named_child(decl, 0); + } + if (ts_node_is_null(inner)) { + break; + } + decl = inner; + } + TSNode null_node = {0}; + return null_node; +} + // Resolve the name node for a function, handling arrow functions. static TSNode resolve_func_name_node(TSNode node) { TSNode name_node = ts_node_child_by_field_name(node, TS_FIELD("name")); @@ -101,6 +139,10 @@ static TSNode resolve_func_name_node(TSNode node) { if (ts_node_is_null(name_node) && strcmp(ts_node_type(node), "function_declaration") == 0) { name_node = cbm_find_child_by_kind(node, "simple_identifier"); } + /* C/C++/CUDA/GLSL: function_definition name lives in the declarator chain. */ + if (ts_node_is_null(name_node) && strcmp(ts_node_type(node), "function_definition") == 0) { + name_node = resolve_c_declarator_name_node(node); + } return name_node; } diff --git a/internal/cbm/helpers.c b/internal/cbm/helpers.c index 1efa6b81..fc67f0ae 100644 --- a/internal/cbm/helpers.c +++ b/internal/cbm/helpers.c @@ -718,6 +718,46 @@ TSNode cbm_find_enclosing_func(TSNode node, CBMLanguage lang) { } // Get the name of a function node (basic: try "name" field) +// C/C++/CUDA/GLSL: function_definition has no "name" field — the function name is +// nested in the declarator chain (pointer/function/parenthesized/array +// declarators wrap it). Descend the `declarator` field to the innermost name +// node. Without this, calls made inside C functions are attributed to the module +// rather than the enclosing function (issue #438). Mirrors resolve_c_declarator_name() +// in extract_defs.c. +#ifndef CBM_DECLARATOR_DEPTH_LIMIT +#define CBM_DECLARATOR_DEPTH_LIMIT 8 /* matches DECLARATOR_DEPTH_LIMIT in extract_defs.c */ +#endif +static TSNode c_declarator_name_node(TSNode func_node) { + TSNode decl = ts_node_child_by_field_name(func_node, TS_FIELD("declarator")); + for (int depth = 0; depth < CBM_DECLARATOR_DEPTH_LIMIT && !ts_node_is_null(decl); depth++) { + const char *dk = ts_node_type(decl); + if (strcmp(dk, "identifier") == 0 || strcmp(dk, "field_identifier") == 0 || + strcmp(dk, "type_identifier") == 0 || strcmp(dk, "destructor_name") == 0 || + strcmp(dk, "operator_name") == 0 || strcmp(dk, "operator_cast") == 0) { + return decl; + } + if (strcmp(dk, "qualified_identifier") == 0 || strcmp(dk, "scoped_identifier") == 0) { + // out-of-line method def (Foo::bar): take the rightmost name segment + TSNode nm = ts_node_child_by_field_name(decl, TS_FIELD("name")); + if (!ts_node_is_null(nm)) { + decl = nm; + continue; + } + return decl; + } + TSNode inner = ts_node_child_by_field_name(decl, TS_FIELD("declarator")); + if (ts_node_is_null(inner) && ts_node_named_child_count(decl) > 0) { + inner = ts_node_named_child(decl, 0); + } + if (ts_node_is_null(inner)) { + break; + } + decl = inner; + } + TSNode null_node = {0}; + return null_node; +} + static const char *func_node_name(CBMArena *a, TSNode func_node, const char *source, CBMLanguage lang) { // Wolfram: set_delayed_top/set_top/set_delayed/set — LHS is apply(user_symbol("f"), ...) @@ -752,6 +792,13 @@ static const char *func_node_name(CBMArena *a, TSNode func_node, const char *sou } } } + // C/C++/CUDA/GLSL: function_definition carries its name in the declarator chain. + if (strcmp(ts_node_type(func_node), "function_definition") == 0) { + TSNode dn = c_declarator_name_node(func_node); + if (!ts_node_is_null(dn)) { + return cbm_node_text(a, dn, source); + } + } return NULL; } diff --git a/tests/test_extraction.c b/tests/test_extraction.c index 0308372c..04ea5f2b 100644 --- a/tests/test_extraction.c +++ b/tests/test_extraction.c @@ -1787,6 +1787,31 @@ TEST(wolfram_caller_attribution) { PASS(); } +/* Issue #438: a C function_definition has no `name` field — the name lives in the + * declarator chain. Calls inside a C function must be attributed to the enclosing + * function, not the module. Pre-fix, enclosing_func_qn fell back to the module QN. */ +TEST(c_caller_attribution) { + CBMFileResult *r = extract("int helper(int x) { return x; }\n" + "int caller(void) { return helper(1); }\n", + CBM_LANG_C, "t", "main.c"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT_GT(r->calls.count, 0); + int saw_helper = 0; + for (int i = 0; i < r->calls.count; i++) { + if (strcmp(r->calls.items[i].callee_name, "helper") == 0) { + saw_helper = 1; + /* enclosing_func_qn must be the function, NOT empty and NOT the module QN. */ + ASSERT_NOT_NULL(r->calls.items[i].enclosing_func_qn); + ASSERT_FALSE(strcmp(r->calls.items[i].enclosing_func_qn, "") == 0); + ASSERT_FALSE(strcmp(r->calls.items[i].enclosing_func_qn, "t.main") == 0); + } + } + ASSERT(saw_helper); + cbm_free_result(r); + PASS(); +} + /* --- Wolfram parse (simple assignment) --- */ TEST(wolfram_parse) { CBMFileResult *r = extract("x = 42;\ny = x + 1;\n", CBM_LANG_WOLFRAM, "t", "simple.wl"); @@ -2963,6 +2988,7 @@ SUITE(extraction) { RUN_TEST(wolfram_function_extended); RUN_TEST(wolfram_call); RUN_TEST(wolfram_caller_attribution); + RUN_TEST(c_caller_attribution); RUN_TEST(wolfram_parse); RUN_TEST(wolfram_import); RUN_TEST(wolfram_nested_def);