@@ -631,6 +631,39 @@ static void expr_free(cbm_expr_t *e) {
631631 // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion)
632632 free (e -> cond .in_values );
633633 }
634+ if (e -> type == EXPR_NOT_EXISTS ) {
635+ if (e -> sub_pattern ) {
636+ /* Free pattern nodes and rels */
637+ for (int i = 0 ; i < e -> sub_pattern -> node_count ; i ++ ) {
638+ free ((void * )e -> sub_pattern -> nodes [i ].variable );
639+ free ((void * )e -> sub_pattern -> nodes [i ].label );
640+ }
641+ for (int i = 0 ; i < e -> sub_pattern -> rel_count ; i ++ ) {
642+ free ((void * )e -> sub_pattern -> rels [i ].variable );
643+ for (int t = 0 ; t < e -> sub_pattern -> rels [i ].type_count ; t ++ ) {
644+ free ((void * )e -> sub_pattern -> rels [i ].types [t ]);
645+ }
646+ free (e -> sub_pattern -> rels [i ].types );
647+ free ((void * )e -> sub_pattern -> rels [i ].direction );
648+ }
649+ free (e -> sub_pattern -> nodes );
650+ free (e -> sub_pattern -> rels );
651+ free (e -> sub_pattern );
652+ }
653+ if (e -> sub_where ) {
654+ cbm_where_clause_t * sw = (cbm_where_clause_t * )e -> sub_where ;
655+ if (sw -> root ) expr_free (sw -> root );
656+ for (int i = 0 ; i < sw -> count ; i ++ ) {
657+ free ((void * )sw -> conditions [i ].variable );
658+ free ((void * )sw -> conditions [i ].property );
659+ free ((void * )sw -> conditions [i ].op );
660+ free ((void * )sw -> conditions [i ].value );
661+ }
662+ free (sw -> conditions );
663+ free ((void * )sw -> op );
664+ free (sw );
665+ }
666+ }
634667 expr_free (e -> left );
635668 expr_free (e -> right );
636669 free (e );
@@ -695,6 +728,8 @@ static const char *unsupported_clause_error(cbm_token_type_t type) {
695728
696729/* Forward declarations for recursive descent */
697730static cbm_expr_t * parse_or_expr (parser_t * p );
731+ static int parse_match_pattern (parser_t * p , cbm_pattern_t * pat );
732+ static int parse_where (parser_t * p , cbm_where_clause_t * * out );
698733
699734/* Parse a single condition: var.prop OP value | var.prop IS [NOT] NULL | var.prop IN [...] */
700735static cbm_expr_t * parse_condition_expr (parser_t * p ) {
@@ -833,9 +868,40 @@ static cbm_expr_t *parse_atom_expr(parser_t *p) {
833868 return parse_condition_expr (p );
834869}
835870
836- /* NOT: NOT atom | atom */
871+ /* NOT: NOT EXISTS { MATCH ... WHERE ... } | NOT atom | atom */
837872static cbm_expr_t * parse_not_expr (parser_t * p ) {
838873 if (match (p , TOK_NOT )) {
874+ /* NOT EXISTS { MATCH (pattern) WHERE ... } — correlated subquery */
875+ if (check (p , TOK_EXISTS )) {
876+ advance (p ); /* consume EXISTS */
877+ if (!expect (p , TOK_LBRACE )) return NULL ;
878+
879+ cbm_expr_t * e = calloc (1 , sizeof (cbm_expr_t ));
880+ e -> type = EXPR_NOT_EXISTS ;
881+
882+ /* Parse inner MATCH pattern */
883+ if (!expect (p , TOK_MATCH )) { free (e ); return NULL ; }
884+ e -> sub_pattern = calloc (1 , sizeof (cbm_pattern_t ));
885+ if (parse_match_pattern (p , e -> sub_pattern ) < 0 ) {
886+ free (e -> sub_pattern );
887+ free (e );
888+ return NULL ;
889+ }
890+
891+ /* Optional inner WHERE */
892+ cbm_where_clause_t * inner_where = NULL ;
893+ parse_where (p , & inner_where );
894+ e -> sub_where = inner_where ;
895+
896+ if (!expect (p , TOK_RBRACE )) {
897+ /* Cleanup on parse failure */
898+ free (e -> sub_pattern );
899+ free (e -> sub_where );
900+ free (e );
901+ return NULL ;
902+ }
903+ return e ;
904+ }
839905 cbm_expr_t * child = parse_not_expr (p );
840906 return child ? expr_not (child ) : NULL ;
841907 }
@@ -1788,6 +1854,16 @@ static void binding_set(binding_t *b, const char *var, const cbm_node_t *node) {
17881854 b -> var_count ++ ;
17891855}
17901856
1857+ /* Forward declarations for NOT EXISTS subquery evaluation */
1858+ static void scan_pattern_nodes (cbm_store_t * store , const char * project , int max_rows ,
1859+ cbm_node_pattern_t * first , cbm_node_t * * out_nodes ,
1860+ int * out_count );
1861+ static void expand_pattern_rels (cbm_store_t * store , cbm_pattern_t * pat , binding_t * * bindings ,
1862+ int * bind_count , const int * bind_cap , const char * * var_name ,
1863+ bool is_optional );
1864+ static bool eval_where (const cbm_where_clause_t * w , binding_t * b , cbm_store_t * store ,
1865+ const char * project , int max_rows );
1866+
17911867/* Evaluate a WHERE condition against a binding */
17921868static bool eval_condition (const cbm_condition_t * c , binding_t * b ) {
17931869 const char * actual ;
@@ -1880,33 +1956,187 @@ static bool eval_condition(const cbm_condition_t *c, binding_t *b) {
18801956 return (int )(c -> negated ? !result : result );
18811957}
18821958
1883- /* Recursive expression tree evaluator */
1884- static bool eval_expr (const cbm_expr_t * e , binding_t * b ) {
1959+ /* Recursive expression tree evaluator.
1960+ * store is needed for EXPR_NOT_EXISTS (correlated subquery expansion). */
1961+ static bool eval_expr (const cbm_expr_t * e , binding_t * b , cbm_store_t * store ,
1962+ const char * project , int max_rows ) {
18851963 if (!e ) {
18861964 return true;
18871965 }
18881966 switch (e -> type ) {
18891967 case EXPR_CONDITION :
18901968 return eval_condition (& e -> cond , b );
18911969 case EXPR_AND :
1892- return (eval_expr (e -> left , b ) && eval_expr (e -> right , b )) != 0 ;
1970+ return (eval_expr (e -> left , b , store , project , max_rows ) &&
1971+ eval_expr (e -> right , b , store , project , max_rows )) != 0 ;
18931972 case EXPR_OR :
1894- return (eval_expr (e -> left , b ) || eval_expr (e -> right , b )) != 0 ;
1973+ return (eval_expr (e -> left , b , store , project , max_rows ) ||
1974+ eval_expr (e -> right , b , store , project , max_rows )) != 0 ;
18951975 case EXPR_NOT :
1896- return (!eval_expr (e -> left , b )) != 0 ;
1976+ return (!eval_expr (e -> left , b , store , project , max_rows )) != 0 ;
18971977 case EXPR_XOR :
1898- return eval_expr (e -> left , b ) != eval_expr (e -> right , b );
1978+ return eval_expr (e -> left , b , store , project , max_rows ) !=
1979+ eval_expr (e -> right , b , store , project , max_rows );
1980+ case EXPR_NOT_EXISTS : {
1981+ if (!e -> sub_pattern || !store ) return true;
1982+ cbm_pattern_t * sp = e -> sub_pattern ;
1983+
1984+ /* OPTIMIZATION: For the common pattern
1985+ * MATCH (n:Function) WHERE NOT EXISTS { MATCH (caller)-[e]->(n) WHERE e.type = 'CALLS' }
1986+ * we detect when the inner pattern's TARGET variable is already bound from
1987+ * the outer scope. Instead of scanning all possible callers, we directly
1988+ * query edges TO the bound node — O(1) per node instead of O(N). */
1989+ if (sp -> rel_count == 1 && sp -> node_count == 2 ) {
1990+ const char * start_var = sp -> nodes [0 ].variable ;
1991+ const char * end_var = sp -> nodes [1 ].variable ;
1992+ cbm_rel_pattern_t * rel = & sp -> rels [0 ];
1993+
1994+ /* Check which end is bound from outer scope */
1995+ cbm_node_t * bound_node = NULL ;
1996+ bool bound_is_target = false;
1997+ if (end_var && binding_get (b , end_var )) {
1998+ bound_node = binding_get (b , end_var );
1999+ bound_is_target = true;
2000+ } else if (start_var && binding_get (b , start_var )) {
2001+ bound_node = binding_get (b , start_var );
2002+ }
2003+
2004+ if (bound_node && bound_node -> id > 0 ) {
2005+ /* Fast path: query edges directly to/from the bound node */
2006+ cbm_edge_t * edges = NULL ;
2007+ int edge_count = 0 ;
2008+ bool found_match = false;
2009+
2010+ for (int ti = 0 ; ti < rel -> type_count && !found_match ; ti ++ ) {
2011+ const char * edge_type = rel -> types [ti ];
2012+ if (bound_is_target ) {
2013+ /* bound node is the target: look for edges incoming TO it */
2014+ cbm_store_find_edges_by_target_type (store , bound_node -> id ,
2015+ edge_type , & edges , & edge_count );
2016+ } else {
2017+ /* bound node is the source: look for edges outgoing FROM it */
2018+ cbm_store_find_edges_by_source_type (store , bound_node -> id ,
2019+ edge_type , & edges , & edge_count );
2020+ }
2021+ /* Apply inner WHERE filter if present */
2022+ cbm_where_clause_t * inner_w = (cbm_where_clause_t * )e -> sub_where ;
2023+ if (edge_count > 0 && inner_w ) {
2024+ /* Build a temporary binding with the edge to check WHERE conditions */
2025+ for (int ei = 0 ; ei < edge_count && !found_match ; ei ++ ) {
2026+ binding_t tmp = * b ; /* shallow copy of outer binding */
2027+ const char * edge_var = rel -> variable ;
2028+ if (edge_var ) {
2029+ binding_set_edge (& tmp , edge_var , & edges [ei ]);
2030+ }
2031+ if (eval_where (inner_w , & tmp , store , project , max_rows )) {
2032+ found_match = true;
2033+ }
2034+ }
2035+ } else if (edge_count > 0 ) {
2036+ found_match = true;
2037+ }
2038+ /* Free edges */
2039+ for (int ei = 0 ; ei < edge_count ; ei ++ ) {
2040+ free ((void * )edges [ei ].project );
2041+ free ((void * )edges [ei ].type );
2042+ free ((void * )edges [ei ].properties_json );
2043+ }
2044+ free (edges );
2045+ edges = NULL ;
2046+ edge_count = 0 ;
2047+ }
2048+
2049+ if (rel -> type_count == 0 && !found_match ) {
2050+ /* No type filter — check ANY edge */
2051+ cbm_edge_t * all_edges = NULL ;
2052+ int all_count = 0 ;
2053+ if (bound_is_target ) {
2054+ cbm_store_find_edges_by_target_type (store , bound_node -> id ,
2055+ NULL , & all_edges , & all_count );
2056+ } else {
2057+ cbm_store_find_edges_by_source_type (store , bound_node -> id ,
2058+ NULL , & all_edges , & all_count );
2059+ }
2060+ if (all_count > 0 ) found_match = true;
2061+ for (int ei = 0 ; ei < all_count ; ei ++ ) {
2062+ free ((void * )all_edges [ei ].project );
2063+ free ((void * )all_edges [ei ].type );
2064+ free ((void * )all_edges [ei ].properties_json );
2065+ }
2066+ free (all_edges );
2067+ }
2068+
2069+ return !found_match ;
2070+ }
2071+ }
2072+
2073+ /* SLOW PATH: Full subquery expansion for complex patterns.
2074+ * Used when no variable is bound from outer scope, or multi-hop patterns. */
2075+ const char * start_var = sp -> nodes [0 ].variable ;
2076+ cbm_node_t * scanned = NULL ;
2077+ int scan_count = 0 ;
2078+ cbm_node_t * outer_node = start_var ? binding_get (b , start_var ) : NULL ;
2079+
2080+ if (outer_node ) {
2081+ scanned = calloc (1 , sizeof (cbm_node_t ));
2082+ scanned [0 ] = * outer_node ;
2083+ scanned [0 ].name = outer_node -> name ? heap_strdup (outer_node -> name ) : NULL ;
2084+ scanned [0 ].label = outer_node -> label ? heap_strdup (outer_node -> label ) : NULL ;
2085+ scanned [0 ].file_path = outer_node -> file_path ? heap_strdup (outer_node -> file_path ) : NULL ;
2086+ scanned [0 ].project = outer_node -> project ? heap_strdup (outer_node -> project ) : NULL ;
2087+ scanned [0 ].qualified_name = outer_node -> qualified_name ? heap_strdup (outer_node -> qualified_name ) : NULL ;
2088+ scan_count = 1 ;
2089+ } else {
2090+ scan_pattern_nodes (store , project , max_rows , & sp -> nodes [0 ],
2091+ & scanned , & scan_count );
2092+ }
2093+
2094+ if (scan_count == 0 ) {
2095+ free (scanned );
2096+ return true;
2097+ }
2098+
2099+ const char * var = start_var ? start_var : "_ne" ;
2100+ int sub_cap = scan_count > 4 ? scan_count : 4 ;
2101+ binding_t * sub_bindings = calloc (sub_cap , sizeof (binding_t ));
2102+ int sub_count = 0 ;
2103+ for (int i = 0 ; i < scan_count && sub_count < sub_cap ; i ++ ) {
2104+ binding_set (& sub_bindings [sub_count ], var , & scanned [i ]);
2105+ sub_count ++ ;
2106+ }
2107+ free (scanned );
2108+
2109+ if (sub_count > 0 && sp -> rel_count > 0 ) {
2110+ expand_pattern_rels (store , sp , & sub_bindings , & sub_count , & sub_cap ,
2111+ & var , false);
2112+ }
2113+
2114+ bool any_match = false;
2115+ cbm_where_clause_t * inner_w = (cbm_where_clause_t * )e -> sub_where ;
2116+ for (int i = 0 ; i < sub_count && !any_match ; i ++ ) {
2117+ bool pass = inner_w ? eval_where (inner_w , & sub_bindings [i ], store , project , max_rows ) : true;
2118+ if (pass ) any_match = true;
2119+ }
2120+ for (int i = 0 ; i < sub_count ; i ++ ) {
2121+ for (int v = 0 ; v < sub_bindings [i ].var_count ; v ++ ) {
2122+ node_fields_free (& sub_bindings [i ].var_nodes [v ]);
2123+ }
2124+ }
2125+ free (sub_bindings );
2126+ return !any_match ;
2127+ }
18992128 }
19002129 return true;
19012130}
19022131
19032132/* Evaluate WHERE clause — uses expression tree if available, falls back to legacy */
1904- static bool eval_where (const cbm_where_clause_t * w , binding_t * b ) {
2133+ static bool eval_where (const cbm_where_clause_t * w , binding_t * b , cbm_store_t * store ,
2134+ const char * project , int max_rows ) {
19052135 if (!w ) {
19062136 return true;
19072137 }
19082138 if (w -> root ) {
1909- return eval_expr (w -> root , b );
2139+ return eval_expr (w -> root , b , store , project , max_rows );
19102140 }
19112141
19122142 /* Legacy flat evaluation */
@@ -2046,7 +2276,7 @@ static const char *eval_case_expr(const cbm_case_expr_t *k, binding_t *b) {
20462276 return "" ;
20472277 }
20482278 for (int i = 0 ; i < k -> branch_count ; i ++ ) {
2049- if (eval_expr (k -> branches [i ].when_expr , b )) {
2279+ if (eval_expr (k -> branches [i ].when_expr , b , NULL , NULL , 0 )) {
20502280 return k -> branches [i ].then_val ? k -> branches [i ].then_val : "" ;
20512281 }
20522282 }
@@ -2429,9 +2659,9 @@ static int execute_single(cbm_store_t *store, cbm_query_t *q, const char *projec
24292659 bool pass = true;
24302660 if (q -> where && pat0 -> rel_count > 0 ) {
24312661 /* With expression tree, evaluate full tree — unbound vars pass through */
2432- pass = eval_where (q -> where , & b );
2662+ pass = eval_where (q -> where , & b , store , project , max_rows );
24332663 } else if (q -> where && pat0 -> rel_count == 0 ) {
2434- pass = eval_where (q -> where , & b );
2664+ pass = eval_where (q -> where , & b , store , project , max_rows );
24352665 }
24362666
24372667 if (pass ) {
@@ -2532,7 +2762,7 @@ static int execute_single(cbm_store_t *store, cbm_query_t *q, const char *projec
25322762 if (q -> where && (pat0 -> rel_count > 0 || q -> pattern_count > 1 )) {
25332763 int kept = 0 ;
25342764 for (int i = 0 ; i < bind_count ; i ++ ) {
2535- if (eval_where (q -> where , & bindings [i ])) {
2765+ if (eval_where (q -> where , & bindings [i ], store , project , max_rows )) {
25362766 if (kept != i ) {
25372767 bindings [kept ] = bindings [i ];
25382768 }
@@ -2840,7 +3070,7 @@ static int execute_single(cbm_store_t *store, cbm_query_t *q, const char *projec
28403070 if (q -> post_with_where ) {
28413071 int kept = 0 ;
28423072 for (int i = 0 ; i < bind_count ; i ++ ) {
2843- if (eval_where (q -> post_with_where , & bindings [i ])) {
3073+ if (eval_where (q -> post_with_where , & bindings [i ], store , project , max_rows )) {
28443074 if (kept != i ) {
28453075 bindings [kept ] = bindings [i ];
28463076 }
0 commit comments