Skip to content

Commit f71fded

Browse files
DeusDataKoolerx
authored andcommitted
Cherry-pick extraction and Cypher improvements from PR #162
- Enum member extraction: C#/Java/TS/C++ enum values as Variable nodes - JS/TS destructured variable splitting: const { A, B, C } = require() emits individual Variable nodes instead of one blob - Cypher -- SQL-style single-line comments - Cypher in_degree/out_degree virtual node properties (thread-safe via binding store pointer, enables dead code detection via Cypher) - Cypher regex matching in inline property filters - Resolution tiebreaking: test/mock QN deprioritization and C# field type hint resolution for obj.Method() disambiguation Co-Authored-By: Tommy <Koolerx@users.noreply.github.com>
1 parent 68dd64c commit f71fded

File tree

4 files changed

+263
-35
lines changed

4 files changed

+263
-35
lines changed

internal/cbm/extract_defs.c

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,8 @@ static void extract_class_methods(CBMExtractCtx *ctx, TSNode class_node, const c
180180
const CBMLangSpec *spec);
181181
static void extract_class_fields(CBMExtractCtx *ctx, TSNode class_node, const char *class_qn,
182182
const CBMLangSpec *spec);
183+
static TSNode find_class_body(TSNode class_node, CBMLanguage lang);
184+
static void extract_enum_members(CBMExtractCtx *ctx, TSNode node, const char *class_qn);
183185
static void extract_elixir_call(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec);
184186

185187
// --- Helpers ---
@@ -1864,6 +1866,10 @@ static void extract_class_def(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec
18641866

18651867
cbm_defs_push(&ctx->result->defs, a, def);
18661868

1869+
if (strcmp(label, "Enum") == 0) {
1870+
extract_enum_members(ctx, node, class_qn);
1871+
}
1872+
18671873
// Extract methods inside the class
18681874
extract_class_methods(ctx, node, class_qn, spec);
18691875

@@ -2426,6 +2432,80 @@ static void extract_csharp_vars(CBMExtractCtx *ctx, TSNode node, CBMArena *a) {
24262432
}
24272433
}
24282434

2435+
/* Check if a tree-sitter node type is an enum member declaration. */
2436+
static bool is_enum_member_kind(const char *kind) {
2437+
return strcmp(kind, "enum_member_declaration") == 0 || strcmp(kind, "enum_constant") == 0 ||
2438+
strcmp(kind, "enum_member") == 0 || strcmp(kind, "enum_assignment") == 0 ||
2439+
strcmp(kind, "enumerator") == 0;
2440+
}
2441+
2442+
/* Extract enum members as Variable nodes (C#, Java, TypeScript, C++). */
2443+
static void extract_enum_members(CBMExtractCtx *ctx, TSNode node, const char *class_qn) {
2444+
CBMArena *a = ctx->arena;
2445+
TSNode body = find_class_body(node, ctx->language);
2446+
if (ts_node_is_null(body)) {
2447+
return;
2448+
}
2449+
uint32_t mc = ts_node_named_child_count(body);
2450+
for (uint32_t mi = 0; mi < mc; mi++) {
2451+
TSNode member = ts_node_named_child(body, mi);
2452+
if (!is_enum_member_kind(ts_node_type(member))) {
2453+
continue;
2454+
}
2455+
TSNode mname = ts_node_child_by_field_name(member, TS_FIELD("name"));
2456+
if (ts_node_is_null(mname)) {
2457+
mname = cbm_find_child_by_kind(member, "identifier");
2458+
}
2459+
if (ts_node_is_null(mname)) {
2460+
continue;
2461+
}
2462+
char *member_name = cbm_node_text(a, mname, ctx->source);
2463+
if (!member_name || !member_name[0]) {
2464+
continue;
2465+
}
2466+
CBMDefinition mdef;
2467+
memset(&mdef, 0, sizeof(mdef));
2468+
mdef.name = member_name;
2469+
mdef.qualified_name = cbm_arena_sprintf(a, "%s.%s", class_qn, member_name);
2470+
mdef.label = "Variable";
2471+
mdef.file_path = ctx->rel_path;
2472+
mdef.start_line = ts_node_start_point(member).row + TS_LINE_OFFSET;
2473+
mdef.end_line = ts_node_end_point(member).row + TS_LINE_OFFSET;
2474+
cbm_defs_push(&ctx->result->defs, a, mdef);
2475+
}
2476+
}
2477+
2478+
/* Resolve the identifier node from a destructure pattern child.
2479+
* pair_pattern → value field; shorthand/identifier → itself; others → first named child. */
2480+
static TSNode destructure_ident(TSNode pat_child) {
2481+
const char *pk = ts_node_type(pat_child);
2482+
if (strcmp(pk, "shorthand_property_identifier_pattern") == 0 || strcmp(pk, "identifier") == 0) {
2483+
return pat_child;
2484+
}
2485+
if (strcmp(pk, "pair_pattern") == 0) {
2486+
return ts_node_child_by_field_name(pat_child, TS_FIELD("value"));
2487+
}
2488+
/* rest_pattern, assignment_pattern, etc. — first named child. */
2489+
return ts_node_named_child(pat_child, 0);
2490+
}
2491+
2492+
/* Emit individual Variable nodes for each destructured binding. */
2493+
static void extract_destructured_vars(CBMExtractCtx *ctx, TSNode pattern, TSNode decl,
2494+
CBMArena *a) {
2495+
uint32_t pc = ts_node_named_child_count(pattern);
2496+
for (uint32_t pi = 0; pi < pc; pi++) {
2497+
TSNode pat_child = ts_node_named_child(pattern, pi);
2498+
TSNode ident = destructure_ident(pat_child);
2499+
if (ts_node_is_null(ident)) {
2500+
continue;
2501+
}
2502+
char *id_text = cbm_node_text(a, ident, ctx->source);
2503+
if (id_text && id_text[0]) {
2504+
push_var_def(ctx, id_text, decl);
2505+
}
2506+
}
2507+
}
2508+
24292509
// JS/TS variable extraction: skip function-assigned declarators.
24302510
static void extract_js_vars(CBMExtractCtx *ctx, TSNode node, CBMArena *a) {
24312511
uint32_t n = ts_node_named_child_count(node);
@@ -2444,7 +2524,14 @@ static void extract_js_vars(CBMExtractCtx *ctx, TSNode node, CBMArena *a) {
24442524
}
24452525
TSNode vname = ts_node_child_by_field_name(child, TS_FIELD("name"));
24462526
if (!ts_node_is_null(vname)) {
2447-
push_var_def(ctx, cbm_node_text(a, vname, ctx->source), child);
2527+
const char *nk = ts_node_type(vname);
2528+
/* Destructured patterns: emit individual identifiers instead of
2529+
* the raw "{A, B, C}" text as a single Variable node. */
2530+
if (strcmp(nk, "object_pattern") == 0 || strcmp(nk, "array_pattern") == 0) {
2531+
extract_destructured_vars(ctx, vname, child, a);
2532+
} else {
2533+
push_var_def(ctx, cbm_node_text(a, vname, ctx->source), child);
2534+
}
24482535
}
24492536
}
24502537
}

src/cypher/cypher.c

Lines changed: 85 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ enum {
3535

3636
#include <ctype.h>
3737
#include "foundation/compat_regex.h"
38+
#include <stddef.h>
3839
#include <stdint.h> // int64_t
3940
#include <stdio.h>
4041
#include <stdlib.h>
@@ -326,6 +327,13 @@ static bool lex_skip_whitespace_comments(const char *input, int len, int *i) {
326327
}
327328
return true;
328329
}
330+
/* SQL-style -- single-line comment */
331+
if (*i + SKIP_ONE < len && input[*i] == '-' && input[*i + SKIP_ONE] == '-') {
332+
while (*i < len && input[*i] != '\n') {
333+
(*i)++;
334+
}
335+
return true;
336+
}
329337
if (*i + SKIP_ONE < len && input[*i] == '/' && input[*i + SKIP_ONE] == '*') {
330338
*i += PAIR_LEN;
331339
while (*i + SKIP_ONE < len && !(input[*i] == '*' && input[*i + SKIP_ONE] == '/')) {
@@ -1639,35 +1647,58 @@ typedef struct {
16391647
const char *edge_var_names[CYP_MAX_EDGE_VARS]; /* variable names (edges) */
16401648
cbm_edge_t edge_vars[CYP_MAX_EDGE_VARS]; /* edge data */
16411649
int edge_var_count;
1650+
cbm_store_t *store; /* for computing in_degree/out_degree on demand */
16421651
} binding_t;
16431652

1644-
/* Get node property by name */
1645-
static const char *node_prop(const cbm_node_t *n, const char *prop) {
1653+
/* Return a string field from a node by property name. NULL-safe. */
1654+
static const char *node_string_field(const cbm_node_t *n, const char *prop) {
1655+
static const struct {
1656+
const char *key;
1657+
size_t offset;
1658+
} fields[] = {
1659+
{"name", offsetof(cbm_node_t, name)},
1660+
{"qualified_name", offsetof(cbm_node_t, qualified_name)},
1661+
{"label", offsetof(cbm_node_t, label)},
1662+
{"file_path", offsetof(cbm_node_t, file_path)},
1663+
};
1664+
for (size_t i = 0; i < sizeof(fields) / sizeof(fields[0]); i++) {
1665+
if (strcmp(prop, fields[i].key) == 0) {
1666+
const char *val = *(const char **)((const char *)n + fields[i].offset);
1667+
return val ? val : "";
1668+
}
1669+
}
1670+
return NULL;
1671+
}
1672+
1673+
/* Get node property by name.
1674+
* store may be NULL; only needed for virtual degree properties. */
1675+
static const char *node_prop(const cbm_node_t *n, const char *prop, cbm_store_t *store) {
16461676
if (!n || !prop) {
16471677
return "";
16481678
}
1649-
if (strcmp(prop, "name") == 0) {
1650-
return n->name ? n->name : "";
1651-
}
1652-
if (strcmp(prop, "qualified_name") == 0) {
1653-
return n->qualified_name ? n->qualified_name : "";
1654-
}
1655-
if (strcmp(prop, "label") == 0) {
1656-
return n->label ? n->label : "";
1657-
}
1658-
if (strcmp(prop, "file_path") == 0) {
1659-
return n->file_path ? n->file_path : "";
1679+
const char *str = node_string_field(n, prop);
1680+
if (str) {
1681+
return str;
16601682
}
1683+
/* Integer properties returned as strings. */
1684+
static _Thread_local char int_buf[CBM_SZ_32];
16611685
if (strcmp(prop, "start_line") == 0) {
1662-
/* Return as string */
1663-
static char buf[CBM_SZ_32];
1664-
snprintf(buf, sizeof(buf), "%d", n->start_line);
1665-
return buf;
1686+
snprintf(int_buf, sizeof(int_buf), "%d", n->start_line);
1687+
return int_buf;
16661688
}
16671689
if (strcmp(prop, "end_line") == 0) {
1668-
static char buf[CBM_SZ_32];
1669-
snprintf(buf, sizeof(buf), "%d", n->end_line);
1670-
return buf;
1690+
snprintf(int_buf, sizeof(int_buf), "%d", n->end_line);
1691+
return int_buf;
1692+
}
1693+
/* Virtual computed properties: in_degree/out_degree via CALLS edges.
1694+
* Enables Cypher dead-code detection: WHERE n.in_degree = '0'. */
1695+
if (store && (strcmp(prop, "in_degree") == 0 || strcmp(prop, "out_degree") == 0)) {
1696+
int in_deg = 0;
1697+
int out_deg = 0;
1698+
cbm_store_node_degree(store, n->id, &in_deg, &out_deg);
1699+
int val = (strcmp(prop, "in_degree") == 0) ? in_deg : out_deg;
1700+
snprintf(int_buf, sizeof(int_buf), "%d", val);
1701+
return int_buf;
16711702
}
16721703
return "";
16731704
}
@@ -1827,6 +1858,7 @@ static void binding_copy(binding_t *dst, const binding_t *src) {
18271858
dst->edge_var_names[i] = src->edge_var_names[i]; /* AST-owned */
18281859
edge_deep_copy(&dst->edge_vars[i], &src->edge_vars[i]);
18291860
}
1861+
dst->store = src->store;
18301862
}
18311863

18321864
/* Deep-copy a node into a binding (binding owns the strings) */
@@ -1858,7 +1890,7 @@ static const char *resolve_condition_value(const cbm_condition_t *c, binding_t *
18581890
return NULL; /* unbound variable */
18591891
}
18601892
if (c->property) {
1861-
return node_prop(n, c->property);
1893+
return node_prop(n, c->property, b->store);
18621894
}
18631895
/* Bare alias (e.g. post-WITH virtual var) — use node name directly */
18641896
return n->name ? n->name : "";
@@ -1991,11 +2023,34 @@ static bool eval_where(const cbm_where_clause_t *w, binding_t *b) {
19912023
return is_and;
19922024
}
19932025

1994-
/* Check inline property filters */
1995-
static bool check_inline_props(const cbm_node_t *n, const cbm_prop_filter_t *props, int count) {
2026+
/* Check if a string value looks like a regex pattern. */
2027+
static bool looks_like_regex(const char *s) {
2028+
if (!s) {
2029+
return false;
2030+
}
2031+
return strstr(s, ".*") || strstr(s, ".+") || strchr(s, '[') || strchr(s, '(') ||
2032+
strchr(s, '|') || strchr(s, '^') || strchr(s, '$');
2033+
}
2034+
2035+
/* Check inline property filters.
2036+
* Values that look like regex patterns are matched with POSIX ERE;
2037+
* plain values use exact strcmp. */
2038+
static bool check_inline_props(const cbm_node_t *n, const cbm_prop_filter_t *props, int count,
2039+
cbm_store_t *store) {
19962040
for (int i = 0; i < count; i++) {
1997-
const char *actual = node_prop(n, props[i].key);
1998-
if (strcmp(actual, props[i].value) != 0) {
2041+
const char *actual = node_prop(n, props[i].key, store);
2042+
if (looks_like_regex(props[i].value)) {
2043+
cbm_regex_t re;
2044+
if (cbm_regcomp(&re, props[i].value, CBM_REG_EXTENDED | CBM_REG_NOSUB) == 0) {
2045+
bool matched = cbm_regexec(&re, actual, 0, NULL, 0) == 0;
2046+
cbm_regfree(&re);
2047+
if (!matched) {
2048+
return false;
2049+
}
2050+
} else if (strcmp(actual, props[i].value) != 0) {
2051+
return false;
2052+
}
2053+
} else if (strcmp(actual, props[i].value) != 0) {
19992054
return false;
20002055
}
20012056
}
@@ -2068,7 +2123,7 @@ static const char *binding_get_virtual(binding_t *b, const char *var, const char
20682123
cbm_node_t *n = binding_get(b, var);
20692124
if (n) {
20702125
if (prop) {
2071-
return node_prop(n, prop);
2126+
return node_prop(n, prop, b->store);
20722127
}
20732128
return n->name ? n->name : "";
20742129
}
@@ -2147,7 +2202,7 @@ static void scan_pattern_nodes(cbm_store_t *store, const char *project, int max_
21472202
if (first->prop_count > 0) {
21482203
int kept = 0;
21492204
for (int i = 0; i < *out_count; i++) {
2150-
if (check_inline_props(&(*out_nodes)[i], first->props, first->prop_count)) {
2205+
if (check_inline_props(&(*out_nodes)[i], first->props, first->prop_count, store)) {
21512206
if (kept != i) {
21522207
(*out_nodes)[kept] = (*out_nodes)[i];
21532208
}
@@ -2178,7 +2233,7 @@ static void process_edges(cbm_store_t *store, cbm_edge_t *edges, int edge_count,
21782233
node_fields_free(&found);
21792234
continue;
21802235
}
2181-
if (!check_inline_props(&found, target_node->props, target_node->prop_count)) {
2236+
if (!check_inline_props(&found, target_node->props, target_node->prop_count, store)) {
21822237
node_fields_free(&found);
21832238
continue;
21842239
}
@@ -2211,7 +2266,7 @@ static void expand_var_length(cbm_store_t *store, cbm_rel_pattern_t *rel,
22112266
if (target_node->label && strcmp(hop->node.label, target_node->label) != 0) {
22122267
continue;
22132268
}
2214-
if (!check_inline_props(&hop->node, target_node->props, target_node->prop_count)) {
2269+
if (!check_inline_props(&hop->node, target_node->props, target_node->prop_count, store)) {
22152270
continue;
22162271
}
22172272
binding_t nb = {0};
@@ -3287,6 +3342,7 @@ static int execute_single(cbm_store_t *store, cbm_query_t *q, const char *projec
32873342

32883343
for (int i = 0; i < scan_count && bind_count < bind_cap; i++) {
32893344
binding_t b = {0};
3345+
b.store = store;
32903346
binding_set(&b, var_name, &scanned[i]);
32913347
bool pass = !q->where || eval_where(q->where, &b);
32923348
if (pass) {

src/pipeline/pass_parallel.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ enum {
2626
#define PP_NSEC_PER_SEC 1000000000ULL
2727
#define PP_USEC_PER_MS 1000000ULL
2828
#define PP_HALF_CONF 0.5
29+
#define PP_FIELD_HINT_CONF 0.85
30+
enum { PP_CSHARP_M_PREFIX_LEN = 2 };
2931
#include "pipeline/pipeline.h"
3032
#include "pipeline/pipeline_internal.h"
3133
#include "pipeline/worker_pool.h"
@@ -1193,6 +1195,60 @@ static const cbm_gbuf_node_t *find_source_node(const cbm_gbuf_t *gbuf, const cha
11931195
return src;
11941196
}
11951197

1198+
/* Field type hint resolution for obj.Method() with multiple candidates.
1199+
* Strips C# field prefixes (_ / m_), capitalizes to get type name, and
1200+
* checks if TypeName.Method or ITypeName.Method exists among candidates. */
1201+
static void try_field_type_hint(resolve_ctx_t *rc, cbm_resolution_t *res, const char *callee_name,
1202+
int64_t source_id) {
1203+
if (!res->qualified_name || res->candidate_count <= SKIP_ONE) {
1204+
return;
1205+
}
1206+
const char *dot = strchr(callee_name, '.');
1207+
if (!dot) {
1208+
return;
1209+
}
1210+
size_t plen = (size_t)(dot - callee_name);
1211+
char obj_name[CBM_SZ_256];
1212+
if (plen >= sizeof(obj_name)) {
1213+
return;
1214+
}
1215+
memcpy(obj_name, callee_name, plen);
1216+
obj_name[plen] = '\0';
1217+
1218+
const char *type_hint = obj_name;
1219+
if (type_hint[0] == '_') {
1220+
type_hint++;
1221+
}
1222+
if (type_hint[0] == 'm' && type_hint[SKIP_ONE] == '_') {
1223+
type_hint += PP_CSHARP_M_PREFIX_LEN;
1224+
}
1225+
1226+
char type_name[CBM_SZ_256];
1227+
snprintf(type_name, sizeof(type_name), "%s", type_hint);
1228+
if (type_name[0] >= 'a' && type_name[0] <= 'z') {
1229+
type_name[0] -= ('a' - 'A');
1230+
}
1231+
1232+
char iface_name[CBM_SZ_256];
1233+
snprintf(iface_name, sizeof(iface_name), "I%s", type_name);
1234+
1235+
const char *method = dot + SKIP_ONE;
1236+
const char **cands = NULL;
1237+
int cand_count = 0;
1238+
cbm_registry_find_by_name(rc->registry, method, &cands, &cand_count);
1239+
for (int ci = 0; ci < cand_count; ci++) {
1240+
if (strstr(cands[ci], type_name) || strstr(cands[ci], iface_name)) {
1241+
const cbm_gbuf_node_t *better = cbm_gbuf_find_by_qn(rc->main_gbuf, cands[ci]);
1242+
if (better && better->id != source_id) {
1243+
res->qualified_name = cands[ci];
1244+
res->confidence = PP_FIELD_HINT_CONF;
1245+
res->strategy = "field_type_hint";
1246+
return;
1247+
}
1248+
}
1249+
}
1250+
}
1251+
11961252
/* Resolve calls for one file and emit CALLS/HTTP_CALLS/ASYNC_CALLS edges. */
11971253
static void resolve_file_calls(resolve_ctx_t *rc, resolve_worker_state_t *ws, CBMFileResult *result,
11981254
const char *rel, const char *module_qn, const char **imp_keys,
@@ -1210,6 +1266,9 @@ static void resolve_file_calls(resolve_ctx_t *rc, resolve_worker_state_t *ws, CB
12101266

12111267
cbm_resolution_t res = cbm_registry_resolve(rc->registry, call->callee_name, module_qn,
12121268
imp_keys, imp_vals, imp_count);
1269+
1270+
try_field_type_hint(rc, &res, call->callee_name, source_node->id);
1271+
12131272
if (!res.qualified_name || res.qualified_name[0] == '\0') {
12141273
if (cbm_service_pattern_route_method(call->callee_name) != NULL) {
12151274
cbm_resolution_t fake_res = {.qualified_name = call->callee_name,

0 commit comments

Comments
 (0)