Skip to content

Commit 22408ed

Browse files
author
Your Name
committed
feat(pipeline): JS/TS constant resolution for Socket.IO channel detection
Two-pass channel extraction for JavaScript/TypeScript: Pass 1 (existing): regex matches string-literal channels: socket.on('Name', ...) Pass 2 (new): resolves constant-name channels: socket.on(CONSTANT_NAME, ...) - Collects const NAME = 'value' mappings from full file source - Matches .emit/.on/.once with bare SCREAMING_CASE identifiers - Resolves constants to their string values - Handles method chaining (.on() without explicit receiver) - Filters short names (<3 chars) to avoid false positives File-level pass in store.c reads complete JS/TS files (up to 512KB) for constant resolution, since per-node snippets don't include file-scope constants. Result: JS service went from 6 channels (test tool only) to 17 channels including all production Socket.IO events: WebRtcSdp, WebRtcIce, CaptureNodeStatusUpdate, RecordedFileUpdate, RecordingSessionUpdate, etc.
1 parent 80772e3 commit 22408ed

2 files changed

Lines changed: 206 additions & 0 deletions

File tree

src/pipeline/httplink.c

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1996,6 +1996,145 @@ int cbm_extract_channels(const char *source, cbm_channel_match_t *out, int max_o
19961996
return count;
19971997
}
19981998

1999+
/* ── JS/TS channel extraction: constant resolution pass ─────────── */
2000+
2001+
/* Second pass for JS/TS: resolves .emit(CONSTANT) and .on(CONSTANT) where
2002+
* the channel name is a JS constant instead of a string literal.
2003+
* Pattern: socket.on(SOME_CONSTANT, handler) / this.emit(EVENT_NAME, data)
2004+
* Resolves via: const SOME_CONSTANT = 'ActualChannelName'; */
2005+
int cbm_extract_js_channels_constants(const char *source, cbm_channel_match_t *out, int max_out) {
2006+
if (!source || !*source) return 0;
2007+
2008+
/* Pass 1: collect const NAME = 'value' and const NAME = "value" mappings */
2009+
typedef struct { char name[128]; char value[256]; } js_const_t;
2010+
js_const_t consts[256];
2011+
int nconsts = 0;
2012+
2013+
cbm_regex_t const_re;
2014+
if (cbm_regcomp(&const_re,
2015+
"const[[:space:]]+([A-Z_][A-Z0-9_]*)[[:space:]]*=[[:space:]]*['\"]([^'\"]{1,128})['\"]",
2016+
CBM_REG_EXTENDED) != 0) {
2017+
return 0;
2018+
}
2019+
2020+
const char *p = source;
2021+
cbm_regmatch_t cm[3];
2022+
while (nconsts < 256 && cbm_regexec(&const_re, p, 3, cm, 0) == 0) {
2023+
int nlen = cm[1].rm_eo - cm[1].rm_so;
2024+
int vlen = cm[2].rm_eo - cm[2].rm_so;
2025+
if (nlen < (int)sizeof(consts[0].name) && vlen < (int)sizeof(consts[0].value)) {
2026+
memcpy(consts[nconsts].name, p + cm[1].rm_so, (size_t)nlen);
2027+
consts[nconsts].name[nlen] = '\0';
2028+
memcpy(consts[nconsts].value, p + cm[2].rm_so, (size_t)vlen);
2029+
consts[nconsts].value[vlen] = '\0';
2030+
nconsts++;
2031+
}
2032+
p += cm[0].rm_eo;
2033+
}
2034+
cbm_regfree(&const_re);
2035+
2036+
if (nconsts == 0) return 0;
2037+
2038+
/* Pass 2: find .emit(CONSTANT) and .on(CONSTANT) with bare identifiers */
2039+
static const char *channel_receivers[] = {
2040+
"socket", "io", "client", "server", "connection",
2041+
"emitter", "eventEmitter", "eventBus", "this",
2042+
"socketIoEventEmitter", "socketServer", "nsp", NULL
2043+
};
2044+
2045+
/* Match both receiver.on(CONSTANT) and chained .on(CONSTANT) patterns.
2046+
* The chained pattern starts with optional whitespace + dot. */
2047+
cbm_regex_t call_re;
2048+
if (cbm_regcomp(&call_re,
2049+
"([a-zA-Z_][a-zA-Z0-9_]*)?\\.("
2050+
"emit|on|once|addListener|onRequest|respond"
2051+
")\\([[:space:]]*([A-Z_][A-Z0-9_]*)",
2052+
CBM_REG_EXTENDED) != 0) {
2053+
return 0;
2054+
}
2055+
2056+
int count = 0;
2057+
p = source;
2058+
cbm_regmatch_t mm[4];
2059+
while (count < max_out && cbm_regexec(&call_re, p, 4, mm, 0) == 0) {
2060+
int rlen = mm[1].rm_eo - mm[1].rm_so;
2061+
char receiver[64];
2062+
bool is_chained = (rlen <= 0); /* method chaining: no receiver captured */
2063+
if (rlen > 0) {
2064+
if (rlen >= (int)sizeof(receiver)) rlen = (int)sizeof(receiver) - 1;
2065+
memcpy(receiver, p + mm[1].rm_so, (size_t)rlen);
2066+
receiver[rlen] = '\0';
2067+
} else {
2068+
receiver[0] = '\0';
2069+
}
2070+
2071+
bool is_channel = is_chained; /* chained .on() assumed to be on socket object */
2072+
if (!is_chained) {
2073+
for (int i = 0; channel_receivers[i]; i++) {
2074+
if (strcasecmp(receiver, channel_receivers[i]) == 0) {
2075+
is_channel = true;
2076+
break;
2077+
}
2078+
}
2079+
}
2080+
2081+
if (is_channel) {
2082+
int mlen = mm[2].rm_eo - mm[2].rm_so;
2083+
char method[32];
2084+
if (mlen >= (int)sizeof(method)) mlen = (int)sizeof(method) - 1;
2085+
memcpy(method, p + mm[2].rm_so, (size_t)mlen);
2086+
method[mlen] = '\0';
2087+
2088+
int clen = mm[3].rm_eo - mm[3].rm_so;
2089+
char constant_name[128];
2090+
if (clen >= (int)sizeof(constant_name)) clen = (int)sizeof(constant_name) - 1;
2091+
memcpy(constant_name, p + mm[3].rm_so, (size_t)clen);
2092+
constant_name[clen] = '\0';
2093+
2094+
/* Resolve constant to string value */
2095+
const char *resolved = NULL;
2096+
for (int c = 0; c < nconsts; c++) {
2097+
if (strcmp(consts[c].name, constant_name) == 0) {
2098+
resolved = consts[c].value;
2099+
break;
2100+
}
2101+
}
2102+
2103+
if (resolved) {
2104+
strncpy(out[count].channel, resolved, sizeof(out[count].channel) - 1);
2105+
out[count].channel[sizeof(out[count].channel) - 1] = '\0';
2106+
} else {
2107+
/* Unresolved constant — use the constant name as channel name */
2108+
strncpy(out[count].channel, constant_name, sizeof(out[count].channel) - 1);
2109+
out[count].channel[sizeof(out[count].channel) - 1] = '\0';
2110+
}
2111+
2112+
/* Skip generic events */
2113+
const char *ch = out[count].channel;
2114+
if (strcmp(ch, "error") != 0 && strcmp(ch, "close") != 0 &&
2115+
strcmp(ch, "end") != 0 && strcmp(ch, "data") != 0 &&
2116+
strcmp(ch, "connect") != 0 && strcmp(ch, "disconnect") != 0 &&
2117+
strcmp(ch, "connection") != 0 && strcmp(ch, "message") != 0) {
2118+
if (strcmp(method, "emit") == 0 || strcmp(method, "respond") == 0) {
2119+
strncpy(out[count].direction, "emit", sizeof(out[count].direction) - 1);
2120+
} else {
2121+
strncpy(out[count].direction, "listen", sizeof(out[count].direction) - 1);
2122+
}
2123+
if (strcasecmp(receiver, "socket") == 0 || strcasecmp(receiver, "io") == 0 ||
2124+
strcasecmp(receiver, "nsp") == 0 || strcasecmp(receiver, "socketServer") == 0) {
2125+
strncpy(out[count].transport, "socketio", sizeof(out[count].transport) - 1);
2126+
} else {
2127+
strncpy(out[count].transport, "eventemitter", sizeof(out[count].transport) - 1);
2128+
}
2129+
count++;
2130+
}
2131+
}
2132+
p += mm[0].rm_eo;
2133+
}
2134+
cbm_regfree(&call_re);
2135+
return count;
2136+
}
2137+
19992138
/* ── C# channel extraction: Socket.IO with constant resolution ─── */
20002139

20012140
/* Extract channels from C# source that uses constant names for event strings.

src/store/store.c

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5047,6 +5047,7 @@ typedef struct {
50475047
} cbm_channel_match_t;
50485048
int cbm_extract_channels(const char *source, cbm_channel_match_t *out, int max_out);
50495049
int cbm_extract_csharp_channels(const char *source, cbm_channel_match_t *out, int max_out);
5050+
int cbm_extract_js_channels_constants(const char *source, cbm_channel_match_t *out, int max_out);
50505051

50515052
int cbm_store_detect_channels(cbm_store_t *s, const char *project, const char *repo_path) {
50525053
if (!s || !s->db || !project || !repo_path) return 0;
@@ -5147,6 +5148,72 @@ int cbm_store_detect_channels(cbm_store_t *s, const char *project, const char *r
51475148

51485149
exec_sql(s, "COMMIT");
51495150
sqlite3_finalize(stmt);
5151+
5152+
/* Second pass: JS/TS constant resolution on full files.
5153+
* The per-node pass above only sees function bodies — constants defined at file
5154+
* scope are invisible. This pass reads complete JS/TS files that contain Socket.IO
5155+
* patterns and resolves constant channel names. */
5156+
{
5157+
const char *file_sql =
5158+
"SELECT DISTINCT file_path FROM nodes WHERE project = ?1 "
5159+
"AND (file_path LIKE '%.js' OR file_path LIKE '%.ts' OR file_path LIKE '%.tsx') "
5160+
"AND label NOT IN ('File','Folder','Project')";
5161+
sqlite3_stmt *fst = NULL;
5162+
sqlite3_prepare_v2(s->db, file_sql, -1, &fst, NULL);
5163+
if (fst) {
5164+
bind_text(fst, 1, project);
5165+
exec_sql(s, "BEGIN TRANSACTION");
5166+
5167+
/* Re-prepare insert for this transaction */
5168+
sqlite3_stmt *ins2 = NULL;
5169+
sqlite3_prepare_v2(s->db,
5170+
"INSERT OR IGNORE INTO channels"
5171+
"(project,channel_name,direction,transport,node_id,file_path,function_name) "
5172+
"VALUES(?1,?2,?3,?4,0,?5,'(file-level)')", -1, &ins2, NULL);
5173+
5174+
while (sqlite3_step(fst) == SQLITE_ROW) {
5175+
const char *fpath = (const char *)sqlite3_column_text(fst, 0);
5176+
if (!fpath) continue;
5177+
5178+
char full_path[2048];
5179+
snprintf(full_path, sizeof(full_path), "%s/%s", repo_path, fpath);
5180+
5181+
FILE *f = fopen(full_path, "r");
5182+
if (!f) continue;
5183+
5184+
/* Read entire file */
5185+
fseek(f, 0, SEEK_END);
5186+
long fsize = ftell(f);
5187+
fseek(f, 0, SEEK_SET);
5188+
if (fsize <= 0 || fsize > 512 * 1024) { fclose(f); continue; } /* skip huge files */
5189+
char *full_source = malloc((size_t)fsize + 1);
5190+
size_t nread = fread(full_source, 1, (size_t)fsize, f);
5191+
full_source[nread] = '\0';
5192+
fclose(f);
5193+
5194+
cbm_channel_match_t matches[64];
5195+
int mc = cbm_extract_js_channels_constants(full_source, matches, 64);
5196+
5197+
for (int i = 0; i < mc && ins2; i++) {
5198+
/* Filter out short constant names (single-letter variables) */
5199+
if (strlen(matches[i].channel) < 3) continue;
5200+
sqlite3_reset(ins2);
5201+
bind_text(ins2, 1, project);
5202+
bind_text(ins2, 2, matches[i].channel);
5203+
bind_text(ins2, 3, matches[i].direction);
5204+
bind_text(ins2, 4, matches[i].transport);
5205+
bind_text(ins2, 5, fpath);
5206+
sqlite3_step(ins2);
5207+
total++;
5208+
}
5209+
free(full_source);
5210+
}
5211+
exec_sql(s, "COMMIT");
5212+
sqlite3_finalize(fst);
5213+
if (ins2) sqlite3_finalize(ins2);
5214+
}
5215+
}
5216+
51505217
if (ins) sqlite3_finalize(ins);
51515218
return total;
51525219
}

0 commit comments

Comments
 (0)