From 012847370a61516f56c8212d79d132f62cf059b1 Mon Sep 17 00:00:00 2001 From: Colm McHugh Date: Thu, 30 Apr 2026 12:20:52 +0000 Subject: [PATCH] Fix segfault in EXPLAIN with LEFT JOIN and correlated subqueries (#8548) Fix by adding a NULL check that skips discarded subplan entries. This is needed because PostgreSQL's setrefs.c (set_plan_references) resolves AlternativeSubPlan nodes by picking one alternative and setting the discarded subplan entries to NULL in PlannedStmt->subplans. PlanContainsDistributedSubPlanRTE() iterated this list without a NULL check, hitting the segfault reported in #8548. Also refactor the function to accept DistributedPlanningContext* instead of a raw List*, making it evident that we're iterating PostgreSQL's PlannedStmt->subplans (where NULL entries are expected). Note: PG16 errors on the test query with 'outer joins and pseudo-const quals are not supported', a known limitation with PG16. Added alternative goldfile for PG16. --- .../distributed/planner/distributed_planner.c | 23 +- .../regress/expected/subquery_in_where.out | 65 + .../regress/expected/subquery_in_where_0.out | 1489 +++++++++++++++++ src/test/regress/sql/subquery_in_where.sql | 59 + 4 files changed, 1631 insertions(+), 5 deletions(-) create mode 100644 src/test/regress/expected/subquery_in_where_0.out diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index d80216b3682..dee156cfaa9 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -83,7 +83,7 @@ int PlannerLevel = 0; static bool ListContainsDistributedTableRTE(List *rangeTableList, bool *maybeHasForeignDistributedTable); -static bool PlanContainsDistributedSubPlanRTE(List *subPlanList); +static bool PlanContainsDistributedSubPlanRTE(DistributedPlanningContext *planContext); static PlannedStmt * CreateDistributedPlannedStmt(DistributedPlanningContext * planContext); static PlannedStmt * InlineCtesAndCreateDistributedPlannedStmt(uint64 planId, @@ -436,8 +436,9 @@ ListContainsDistributedTableRTE(List *rangeTableList, /* - * PlanContainsDistributedSubPlanRTE checks whether any of the subplans in the given - * subPlanList is a Read Intermediate Result function scan. + * PlanContainsDistributedSubPlanRTE checks whether any of the subplans in the + * plan context's PlannedStmt->subplans list is a Read Intermediate Result + * function scan. * * It is used by the check after standard_planner() to determine whether the plan * still requires distributed planning; in addition to checking the range table for @@ -446,14 +447,26 @@ ListContainsDistributedTableRTE(List *rangeTableList, * that distributed planning is required. */ static bool -PlanContainsDistributedSubPlanRTE(List *subPlanList) +PlanContainsDistributedSubPlanRTE(DistributedPlanningContext *planContext) { + /* + * We iterate over planContext->plan->subplans, which is PostgreSQL's + * PlannedStmt->subplans list. PostgreSQL's setrefs.c (set_plan_references) + * resolves AlternativeSubPlan nodes by picking one alternative and setting + * the discarded subplan entries to NULL. We must therefore skip NULL entries. + */ + List *subPlanList = planContext->plan->subplans; ListCell *subPlanCell = NULL; foreach(subPlanCell, subPlanList) { Node *planRoot = (Node *) lfirst(subPlanCell); + if (planRoot == NULL) + { + continue; + } + if (!IsA(planRoot, FunctionScan)) { continue; @@ -2975,7 +2988,7 @@ CheckPostPlanDistribution(DistributedPlanningContext *planContext, bool /* ..or a distributed subplan */ planHasDistribution = planHasDistribution || PlanContainsDistributedSubPlanRTE( - planContext->plan->subplans); + planContext); /* * The plan has a distributed relation, so we know for sure that diff --git a/src/test/regress/expected/subquery_in_where.out b/src/test/regress/expected/subquery_in_where.out index fcb8180a1b0..2a586ca8724 100644 --- a/src/test/regress/expected/subquery_in_where.out +++ b/src/test/regress/expected/subquery_in_where.out @@ -1414,6 +1414,71 @@ WHERE true OR NOT EXISTS (SELECT 1 FROM t1); 1 (1 row) +-- Test crash fix for issue #8548 +-- A query with LEFT JOIN to a distributed table and correlated subqueries +-- could crash because PostgreSQL's setrefs.c sets subplan list entries to +-- NULL when AlternativeSubPlan resolution discards unused alternatives. +-- PlanContainsDistributedSubPlanRTE must skip NULL entries. +CREATE TABLE t4 (vkey integer, pkey integer, c30 integer, c31 integer, c32 text); +CREATE TABLE t5 (vkey integer, pkey integer, c33 text, c34 integer, c35 integer, + c36 timestamp without time zone); +CREATE TABLE t2 (vkey integer, pkey integer, c15 numeric, c16 timestamp without time zone, + c17 text, c18 text, c19 timestamp without time zone, + c20 timestamp without time zone, c21 integer); +CREATE TABLE t22 (vkey integer, pkey integer, c37 numeric, c38 text, c39 numeric, + c40 numeric, c41 numeric, c42 integer, + c43 timestamp without time zone, c44 numeric, + colocated_key numeric); +SELECT create_distributed_table('t22', 'colocated_key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- This query should not crash (issue #8548) +SELECT + 70 AS c_0 +FROM + ( + SELECT + ( + EXISTS ( + SELECT ref_5.c33 AS c_0 + FROM t5 AS ref_5 + WHERE (make_timestamp(2001, 7, 13, 17, 53, 31)) = (ref_1.c43) + ) + ) AS c_0 + FROM + ( + t4 AS ref_0 + LEFT OUTER JOIN t22 AS ref_1 + ON (ref_0.vkey = ref_1.vkey) + ) + WHERE + ( + (ref_0.c31) >= ( + SELECT ref_1.pkey AS c_0 + FROM t2 AS ref_2 + WHERE (true) < ((ref_2.c17) ^@ (ref_0.c32)) + ORDER BY c_0 DESC + LIMIT 1 + ) + ) IN ( + SELECT (ref_1.c40) <= (ref_1.c37) AS c_0 + FROM t7 AS ref_4 + WHERE NOT ((ref_1.c40) <> (ref_1.c41)) + ) + ) AS subq_0 +WHERE + (TRUE) < (TRUE); + c_0 +--------------------------------------------------------------------- +(0 rows) + +DROP TABLE t4; +DROP TABLE t5; +DROP TABLE t2; +DROP TABLE t22; DROP TABLE local_table; DROP TABLE t0; DROP TABLE t1; diff --git a/src/test/regress/expected/subquery_in_where_0.out b/src/test/regress/expected/subquery_in_where_0.out new file mode 100644 index 00000000000..314ae7a78b7 --- /dev/null +++ b/src/test/regress/expected/subquery_in_where_0.out @@ -0,0 +1,1489 @@ +-- =================================================================== +-- test recursive planning functionality with subqueries in WHERE +-- =================================================================== +CREATE SCHEMA subquery_in_where; +SET search_path TO subquery_in_where, public; +SET client_min_messages TO DEBUG1; +--CTEs can be used as a recurring tuple with subqueries in WHERE +WITH event_id + AS MATERIALIZED (SELECT user_id AS events_user_id, + time AS events_time, + event_type + FROM events_table) +SELECT Count(*) +FROM event_id +WHERE events_user_id IN (SELECT user_id + FROM users_table); +DEBUG: generating subplan XXX_1 for CTE event_id: SELECT user_id AS events_user_id, "time" AS events_time, event_type FROM public.events_table +DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM public.users_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) event_id WHERE (events_user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))) + count +--------------------------------------------------------------------- + 101 +(1 row) + +--Correlated subqueries can not be used in WHERE clause +WITH event_id + AS(SELECT user_id AS events_user_id, + time AS events_time, + event_type + FROM events_table) +SELECT Count(*) +FROM event_id +WHERE (events_user_id, random()) IN (SELECT user_id, 1 + FROM users_table + WHERE users_table.time = events_time); +DEBUG: CTE event_id is going to be inlined via distributed planning +DEBUG: generating subplan XXX_1 for CTE event_id: SELECT user_id AS events_user_id, "time" AS events_time, event_type FROM public.events_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) event_id WHERE ((events_user_id, random()) OPERATOR(pg_catalog.=) ANY (SELECT users_table.user_id, 1 FROM public.users_table WHERE (users_table."time" OPERATOR(pg_catalog.=) event_id.events_time))) +ERROR: correlated subqueries are not supported when the FROM clause contains a CTE or subquery +-- Recurring tuples as empty join tree +SELECT * +FROM (SELECT 1 AS id, 2 AS value_1, 3 AS value_3 + UNION ALL SELECT 2 as id, 3 as value_1, 4 as value_3) AS tt1 +WHERE id IN (SELECT user_id + FROM events_table) +ORDER BY 1; +DEBUG: generating subplan XXX_1 for subquery SELECT 1 AS id, 2 AS value_1, 3 AS value_3 UNION ALL SELECT 2 AS id, 3 AS value_1, 4 AS value_3 +DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM public.events_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id, value_1, value_3 FROM (SELECT intermediate_result.id, intermediate_result.value_1, intermediate_result.value_3 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, value_1 integer, value_3 integer)) tt1 WHERE (id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))) ORDER BY id + id | value_1 | value_3 +--------------------------------------------------------------------- + 1 | 2 | 3 + 2 | 3 | 4 +(2 rows) + +-- Recurring tuples in from clause as CTE and SET operation in WHERE clause +SELECT Count(*) +FROM (WITH event_id AS + (SELECT user_id AS events_user_id, time AS events_time, event_type + FROM events_table) + SELECT events_user_id, events_time, event_type + FROM event_id + ORDER BY 1,2,3 + LIMIT 10) AS sub_table +WHERE events_user_id IN ( + (SELECT user_id + FROM users_table + ORDER BY 1 + LIMIT 10) + UNION ALL + (SELECT value_1 + FROM users_table + ORDER BY 1 + limit 10)); +DEBUG: CTE event_id is going to be inlined via distributed planning +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table ORDER BY user_id LIMIT 10 +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_2 for subquery SELECT value_1 FROM public.users_table ORDER BY value_1 LIMIT 10 +DEBUG: generating subplan XXX_3 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION ALL SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer) +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_4 for subquery SELECT events_user_id, events_time, event_type FROM (SELECT events_table.user_id AS events_user_id, events_table."time" AS events_time, events_table.event_type FROM public.events_table) event_id ORDER BY events_user_id, events_time, event_type LIMIT 10 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) sub_table WHERE (events_user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))) + count +--------------------------------------------------------------------- + 10 +(1 row) + +-- Recurring tuples in from clause as SET operation on recursively plannable +-- queries and CTE in WHERE clause +SELECT + * +FROM + ( + (SELECT + user_id + FROM + users_table + ORDER BY + user_id ASC + LIMIT + 10 + ) + UNION ALL + (SELECT + value_1 + FROM + users_table + ORDER BY + value_1 ASC + LIMIT + 10 + ) + ) as SUB_TABLE +WHERE + user_id +IN + ( + WITH event_id AS ( + SELECT + user_id as events_user_id, time as events_time, event_type + FROM + events_table + ) + SELECT + events_user_id + FROM + event_id + ORDER BY + events_user_id + LIMIT + 10 + ); +DEBUG: CTE event_id is going to be inlined via distributed planning +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_1 for subquery SELECT events_user_id FROM (SELECT events_table.user_id AS events_user_id, events_table."time" AS events_time, events_table.event_type FROM public.events_table) event_id ORDER BY events_user_id LIMIT 10 +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM public.users_table ORDER BY user_id LIMIT 10 +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_3 for subquery SELECT value_1 FROM public.users_table ORDER BY value_1 LIMIT 10 +DEBUG: generating subplan XXX_4 for subquery SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer) UNION ALL SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id FROM (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) sub_table WHERE (user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.events_user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer))) + user_id +--------------------------------------------------------------------- + 1 + 1 + 1 + 1 + 1 + 1 + 1 +(7 rows) + +-- Complex target list in WHERE clause +SELECT + COUNT(*) +FROM + (SELECT + user_id as events_user_id, time as events_time, event_type + FROM + events_table + ORDER BY + 1,2 + LIMIT + 10 + ) as SUB_TABLE +WHERE + events_user_id +<=ANY ( + SELECT + max(abs(user_id * 1) + mod(user_id, 3)) as val_1 + FROM + users_table + GROUP BY + user_id +); +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_1 for subquery SELECT user_id AS events_user_id, "time" AS events_time, event_type FROM public.events_table ORDER BY user_id, "time" LIMIT 10 +DEBUG: generating subplan XXX_2 for subquery SELECT max((abs((user_id OPERATOR(pg_catalog.*) 1)) OPERATOR(pg_catalog.+) mod(user_id, 3))) AS val_1 FROM public.users_table GROUP BY user_id +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) sub_table WHERE (events_user_id OPERATOR(pg_catalog.<=) ANY (SELECT intermediate_result.val_1 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(val_1 integer))) + count +--------------------------------------------------------------------- + 10 +(1 row) + +-- DISTINCT clause in WHERE +SELECT + COUNT(*) +FROM + (SELECT + user_id as events_user_id, time as events_time, event_type + FROM + events_table + LIMIT + 10 + ) as SUB_TABLE +WHERE + events_user_id +IN ( + SELECT + distinct user_id + FROM + users_table + GROUP BY + user_id +); +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_1 for subquery SELECT user_id AS events_user_id, "time" AS events_time, event_type FROM public.events_table LIMIT 10 +DEBUG: generating subplan XXX_2 for subquery SELECT DISTINCT user_id FROM public.users_table GROUP BY user_id +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) sub_table WHERE (events_user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))) + count +--------------------------------------------------------------------- + 10 +(1 row) + +-- AND in WHERE clause +SELECT + COUNT(*) +FROM + (SELECT + user_id as events_user_id, time as events_time, event_type + FROM + events_table + ORDER BY + 1,2,3 + LIMIT + 10 + ) as SUB_TABLE +WHERE + events_user_id +>=ANY ( + SELECT + min(user_id) + FROM + users_table + GROUP BY + user_id +) +AND + events_user_id +<=ANY ( + SELECT + max(user_id) + FROM + users_table + GROUP BY + user_id +); +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_1 for subquery SELECT user_id AS events_user_id, "time" AS events_time, event_type FROM public.events_table ORDER BY user_id, "time", event_type LIMIT 10 +DEBUG: generating subplan XXX_2 for subquery SELECT min(user_id) AS min FROM public.users_table GROUP BY user_id +DEBUG: generating subplan XXX_3 for subquery SELECT max(user_id) AS max FROM public.users_table GROUP BY user_id +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) sub_table WHERE ((events_user_id OPERATOR(pg_catalog.>=) ANY (SELECT intermediate_result.min FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(min integer))) AND (events_user_id OPERATOR(pg_catalog.<=) ANY (SELECT intermediate_result.max FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(max integer)))) + count +--------------------------------------------------------------------- + 10 +(1 row) + +-- AND in WHERE clause, part of the AND is pushdownable other is not +SELECT + COUNT(*) +FROM + (SELECT + user_id as events_user_id, time as events_time, event_type + FROM + events_table + ORDER BY + 1,2,3 + LIMIT + 10 + ) as SUB_TABLE +WHERE + events_user_id +>=ANY ( + SELECT + min(user_id) + FROM + users_table + GROUP BY + user_id +) +AND + events_user_id +<=ANY ( + SELECT + max(value_2) + FROM + users_table + GROUP BY + user_id +); +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_1 for subquery SELECT user_id AS events_user_id, "time" AS events_time, event_type FROM public.events_table ORDER BY user_id, "time", event_type LIMIT 10 +DEBUG: generating subplan XXX_2 for subquery SELECT min(user_id) AS min FROM public.users_table GROUP BY user_id +DEBUG: generating subplan XXX_3 for subquery SELECT max(value_2) AS max FROM public.users_table GROUP BY user_id +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.events_user_id, intermediate_result.events_time, intermediate_result.event_type FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(events_user_id integer, events_time timestamp without time zone, event_type integer)) sub_table WHERE ((events_user_id OPERATOR(pg_catalog.>=) ANY (SELECT intermediate_result.min FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(min integer))) AND (events_user_id OPERATOR(pg_catalog.<=) ANY (SELECT intermediate_result.max FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(max integer)))) + count +--------------------------------------------------------------------- + 10 +(1 row) + +-- Planning subqueries in WHERE clause in CTE recursively +WITH cte AS ( + SELECT + * + FROM + (SELECT + * + FROM + users_table + ORDER BY + user_id ASC, + value_2 DESC + LIMIT + 10 + ) as sub_table + WHERE + user_id + IN + (SELECT + value_2 + FROM + events_table + ) +) +SELECT + COUNT(*) +FROM + cte; +DEBUG: CTE cte is going to be inlined via distributed planning +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table ORDER BY user_id, value_2 DESC LIMIT 10 +DEBUG: generating subplan XXX_2 for subquery SELECT value_2 FROM public.events_table +DEBUG: generating subplan XXX_3 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) sub_table WHERE (user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer))) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) cte + count +--------------------------------------------------------------------- + 10 +(1 row) + +-- Planing subquery in WHERE clause in FROM clause of a subquery recursively +SELECT + COUNT(*) +FROM + (SELECT + * + FROM + (SELECT + * + FROM + users_table + ORDER BY + user_id ASC, + value_2 DESC + LIMIT + 10 + ) as sub_table_1 + WHERE + user_id + IN + (SELECT + value_2 + FROM + events_table + ) + ) as sub_table_2; +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_1 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table ORDER BY user_id, value_2 DESC LIMIT 10 +DEBUG: generating subplan XXX_2 for subquery SELECT value_2 FROM public.events_table +DEBUG: generating subplan XXX_3 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) sub_table_1 WHERE (user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer))) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) sub_table_2 + count +--------------------------------------------------------------------- + 10 +(1 row) + +-- Recurring table in the FROM clause of a subquery in the FROM clause +-- Recurring table is created by joining a two recurrign table +SELECT + SUM(user_id) +FROM + (SELECT + * + FROM + (SELECT + user_id + FROM + users_table + ORDER BY + user_id + LIMIT 10) as t1 + INNER JOIN + (SELECT + user_id as user_id_2 + FROM + users_table + ORDER BY + user_id + LIMIT + 10) as t2 + ON + t1.user_id = t2.user_id_2 + WHERE + t1.user_id + IN + (SELECT + value_2 + FROM + events_table) + ) as t3 +WHERE + user_id +>ANY + (SELECT + min(user_id) + FROM + events_table + GROUP BY + user_id); +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table ORDER BY user_id LIMIT 10 +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_2 for subquery SELECT user_id AS user_id_2 FROM public.users_table ORDER BY user_id LIMIT 10 +DEBUG: generating subplan XXX_3 for subquery SELECT value_2 FROM public.events_table +DEBUG: generating subplan XXX_4 for subquery SELECT t1.user_id, t2.user_id_2 FROM ((SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) t1 JOIN (SELECT intermediate_result.user_id_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id_2 integer)) t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id_2))) WHERE (t1.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer))) +DEBUG: generating subplan XXX_5 for subquery SELECT min(user_id) AS min FROM public.events_table GROUP BY user_id +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT sum(user_id) AS sum FROM (SELECT intermediate_result.user_id, intermediate_result.user_id_2 FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, user_id_2 integer)) t3 WHERE (user_id OPERATOR(pg_catalog.>) ANY (SELECT intermediate_result.min FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result(min integer))) + sum +--------------------------------------------------------------------- + 18 +(1 row) + +-- Same example with the above query, but now check the rows with EXISTS +SELECT + SUM(user_id) +FROM + (SELECT + * + FROM + (SELECT + user_id + FROM + users_table + ORDER BY + user_id + LIMIT 10) as t1 + INNER JOIN + (SELECT + user_id as user_id_2 + FROM + users_table + ORDER BY + user_id + LIMIT + 10) as t2 + ON + t1.user_id = t2.user_id_2 + WHERE + t1.user_id + IN + (SELECT + value_2 + FROM + events_table) + ) as t3 +WHERE EXISTS + (SELECT + 1,2 + FROM + events_table + WHERE + events_table.value_2 = events_table.user_id); +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table ORDER BY user_id LIMIT 10 +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_2 for subquery SELECT user_id AS user_id_2 FROM public.users_table ORDER BY user_id LIMIT 10 +DEBUG: generating subplan XXX_3 for subquery SELECT value_2 FROM public.events_table +DEBUG: generating subplan XXX_4 for subquery SELECT t1.user_id, t2.user_id_2 FROM ((SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) t1 JOIN (SELECT intermediate_result.user_id_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id_2 integer)) t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id_2))) WHERE (t1.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer))) +DEBUG: generating subplan XXX_5 for subquery SELECT 1, 2 FROM public.events_table WHERE (value_2 OPERATOR(pg_catalog.=) user_id) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT sum(user_id) AS sum FROM (SELECT intermediate_result.user_id, intermediate_result.user_id_2 FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, user_id_2 integer)) t3 WHERE (EXISTS (SELECT intermediate_result."?column?", intermediate_result."?column?_1" FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer, "?column?_1" integer))) + sum +--------------------------------------------------------------------- + 67 +(1 row) + +-- Same query with the above one, yet now we check the row's NON-existence +-- by NOT EXISTS. Note that, max value_2 of events_table is 5 +SELECT + SUM(user_id) +FROM + (SELECT + * + FROM + (SELECT + user_id + FROM + users_table + ORDER BY + user_id + LIMIT 10) as t1 + INNER JOIN + (SELECT + user_id as user_id_2 + FROM + users_table + ORDER BY + user_id + LIMIT + 10) as t2 + ON + t1.user_id = t2.user_id_2 + WHERE + t1.user_id + IN + (SELECT + value_2 + FROM + events_table) + ) as t3 +WHERE NOT EXISTS + (SELECT + 1,2 + FROM + events_table + WHERE + events_table.value_2 = events_table.user_id + 6); +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table ORDER BY user_id LIMIT 10 +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_2 for subquery SELECT user_id AS user_id_2 FROM public.users_table ORDER BY user_id LIMIT 10 +DEBUG: generating subplan XXX_3 for subquery SELECT value_2 FROM public.events_table +DEBUG: generating subplan XXX_4 for subquery SELECT t1.user_id, t2.user_id_2 FROM ((SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer)) t1 JOIN (SELECT intermediate_result.user_id_2 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id_2 integer)) t2 ON ((t1.user_id OPERATOR(pg_catalog.=) t2.user_id_2))) WHERE (t1.user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_3'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer))) +DEBUG: generating subplan XXX_5 for subquery SELECT 1, 2 FROM public.events_table WHERE (value_2 OPERATOR(pg_catalog.=) (user_id OPERATOR(pg_catalog.+) 6)) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT sum(user_id) AS sum FROM (SELECT intermediate_result.user_id, intermediate_result.user_id_2 FROM read_intermediate_result('XXX_4'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, user_id_2 integer)) t3 WHERE (NOT (EXISTS (SELECT intermediate_result."?column?", intermediate_result."?column?_1" FROM read_intermediate_result('XXX_5'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer, "?column?_1" integer)))) + sum +--------------------------------------------------------------------- + 67 +(1 row) + +-- Check the existence of row by comparing it with the result of subquery in +-- WHERE clause. Note that subquery is planned recursively since there is no +-- distributed table in the from +SELECT + * +FROM + (SELECT + user_id, value_1 + FROM + users_table + ORDER BY + user_id ASC, + value_1 ASC + LIMIT 10) as t3 +WHERE row(user_id, value_1) = + (SELECT + min(user_id) + 1, min(user_id) + 1 + FROM + events_table); +DEBUG: generating subplan XXX_1 for subquery SELECT (min(user_id) OPERATOR(pg_catalog.+) 1), (min(user_id) OPERATOR(pg_catalog.+) 1) FROM public.events_table +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_2 for subquery SELECT user_id, value_1 FROM public.users_table ORDER BY user_id, value_1 LIMIT 10 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT user_id, value_1 FROM (SELECT intermediate_result.user_id, intermediate_result.value_1 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, value_1 integer)) t3 WHERE ((user_id, value_1) OPERATOR(pg_catalog.=) (SELECT intermediate_result."?column?", intermediate_result."?column?_1" FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result("?column?" integer, "?column?_1" integer))) + user_id | value_1 +--------------------------------------------------------------------- +(0 rows) + +-- Recursively plan subquery in WHERE clause when the FROM clause has a subquery +-- generated by generate_series function +SELECT + * +FROM + (SELECT + * + FROM + generate_series(1,10) + ) as gst +WHERE + generate_series +IN + (SELECT + value_2 + FROM + events_table + ) +ORDER BY + generate_series ASC; +DEBUG: generating subplan XXX_1 for subquery SELECT value_2 FROM public.events_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT generate_series FROM (SELECT generate_series.generate_series FROM generate_series(1, 10) generate_series(generate_series)) gst WHERE (generate_series OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_2 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_2 integer))) ORDER BY generate_series + generate_series +--------------------------------------------------------------------- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +-- Similar to the test above, now we also have a generate_series in WHERE clause +SELECT + * +FROM + (SELECT + * + FROM + generate_series(1,10) + ) as gst +WHERE + generate_series +IN + (SELECT + user_id + FROM + users_table + WHERE + user_id + IN + (SELECT + * + FROM + generate_series(1,3) + ) + ) +ORDER BY + generate_series ASC; +DEBUG: generating subplan XXX_1 for subquery SELECT user_id FROM public.users_table WHERE (user_id OPERATOR(pg_catalog.=) ANY (SELECT generate_series.generate_series FROM generate_series(1, 3) generate_series(generate_series))) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT generate_series FROM (SELECT generate_series.generate_series FROM generate_series(1, 10) generate_series(generate_series)) gst WHERE (generate_series OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))) ORDER BY generate_series + generate_series +--------------------------------------------------------------------- + 1 + 2 + 3 +(3 rows) + +-- non-colocated subquery in WHERE clause ANDed with false +SELECT count(*) +FROM users_Table +WHERE (FALSE AND EXISTS (SELECT * FROM events_table)); + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- multiple non-colocated subqueries in WHERE clause ANDed with false +SELECT count(*) +FROM users_Table +WHERE value_1 IN + (SELECT value_1 + FROM users_Table) OR (FALSE AND EXISTS (SELECT * FROM events_table)); +DEBUG: generating subplan XXX_1 for subquery SELECT value_1 FROM public.users_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.users_table WHERE ((value_1 OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer))) OR (false AND (EXISTS (SELECT events_table.user_id, events_table."time", events_table.event_type, events_table.value_2, events_table.value_3, events_table.value_4 FROM public.events_table)))) + count +--------------------------------------------------------------------- + 101 +(1 row) + +-- multiple non-colocated subqueries in WHERE clause ANDed with false +SELECT count(*) +FROM users_Table +WHERE value_1 IN + (SELECT value_1 + FROM users_Table) AND (FALSE AND EXISTS (SELECT * FROM events_table)); + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- non-colocated subquery in WHERE clause ANDed with true +SELECT count(*) +FROM users_Table +WHERE (TRUE AND EXISTS (SELECT * FROM events_table)); +DEBUG: generating subplan XXX_1 for subquery SELECT user_id, "time", event_type, value_2, value_3, value_4 FROM public.events_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.users_table WHERE (true AND (EXISTS (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint)))) + count +--------------------------------------------------------------------- + 101 +(1 row) + +-- multiple non-colocated subqueries in WHERE clause ANDed with true +SELECT count(*) +FROM users_Table +WHERE value_1 IN + (SELECT value_1 + FROM users_Table) OR (EXISTS (SELECT * FROM events_table)); +DEBUG: generating subplan XXX_1 for subquery SELECT value_1 FROM public.users_table +DEBUG: generating subplan XXX_2 for subquery SELECT user_id, "time", event_type, value_2, value_3, value_4 FROM public.events_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.users_table WHERE ((value_1 OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(value_1 integer))) OR (EXISTS (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.event_type, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, event_type integer, value_2 integer, value_3 double precision, value_4 bigint)))) + count +--------------------------------------------------------------------- + 101 +(1 row) + +-- correlated subquery with aggregate in WHERE +SELECT + * +FROM + users_table +WHERE + user_id IN + ( + SELECT + SUM(events_table.user_id) + FROM + events_table + WHERE + users_table.user_id = events_table.user_id + ) +; + user_id | time | value_1 | value_2 | value_3 | value_4 +--------------------------------------------------------------------- +(0 rows) + +-- correlated subquery with aggregate in HAVING +SELECT + * +FROM + users_table +WHERE + user_id IN + ( + SELECT + SUM(events_table.user_id) + FROM + events_table + WHERE + events_table.user_id = users_table.user_id + HAVING + MIN(value_2) > 2 + ) +; + user_id | time | value_1 | value_2 | value_3 | value_4 +--------------------------------------------------------------------- +(0 rows) + +-- Local tables also planned recursively, so using it as part of the FROM clause +-- make the clause recurring +CREATE TABLE local_table(id int, value_1 int); +INSERT INTO local_table VALUES(1,1), (2,2); +SELECT + * +FROM + (SELECT + * + FROM + local_table) as sub_table +WHERE + id +IN + (SELECT + user_id + FROM + users_table) +ORDER BY id; +DEBUG: generating subplan XXX_1 for subquery SELECT id, value_1 FROM subquery_in_where.local_table +DEBUG: generating subplan XXX_2 for subquery SELECT user_id FROM public.users_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT id, value_1 FROM (SELECT intermediate_result.id, intermediate_result.value_1 FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, value_1 integer)) sub_table WHERE (id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.user_id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer))) ORDER BY id + id | value_1 +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 +(2 rows) + +-- Use local table in WHERE clause +SELECT + COUNT(*) +FROM + (SELECT + * + FROM + users_table + ORDER BY + user_id + LIMIT + 10) as sub_table +WHERE + user_id +IN + (SELECT + id + FROM + local_table); +DEBUG: generating subplan XXX_1 for subquery SELECT id FROM subquery_in_where.local_table +DEBUG: push down of limit count: 10 +DEBUG: generating subplan XXX_2 for subquery SELECT user_id, "time", value_1, value_2, value_3, value_4 FROM public.users_table ORDER BY user_id LIMIT 10 +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.user_id, intermediate_result."time", intermediate_result.value_1, intermediate_result.value_2, intermediate_result.value_3, intermediate_result.value_4 FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(user_id integer, "time" timestamp without time zone, value_1 integer, value_2 integer, value_3 double precision, value_4 bigint)) sub_table WHERE (user_id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer))) + count +--------------------------------------------------------------------- + 10 +(1 row) + +-- basic NOT IN correlated subquery +SELECT + count(*) +FROM + events_table e +WHERE + value_2 NOT IN (SELECT value_2 FROM users_table WHERE user_id = e.user_id); + count +--------------------------------------------------------------------- + 7 +(1 row) + +-- correlated subquery with limit +SELECT + count(*) +FROM + events_table e +WHERE + value_2 IN (SELECT value_2 FROM users_table WHERE user_id = e.user_id ORDER BY value_2 LIMIT 1); + count +--------------------------------------------------------------------- + 10 +(1 row) + +-- correlated subquery with distinct +SELECT + count(*) +FROM + events_table e +WHERE + value_2 IN (SELECT DISTINCT (value_3) FROM users_table WHERE user_id = e.user_id); + count +--------------------------------------------------------------------- + 90 +(1 row) + +-- correlated subquery with aggregate +SELECT + count(*) +FROM + events_table e +WHERE + value_2 = (SELECT max(value_2) FROM users_table WHERE user_id = e.user_id); + count +--------------------------------------------------------------------- + 11 +(1 row) + +-- correlated subquery with window function +SELECT + count(*) +FROM + events_table e +WHERE + value_2 IN (SELECT row_number() OVER () FROM users_table WHERE user_id = e.user_id); + count +--------------------------------------------------------------------- + 94 +(1 row) + +-- correlated subquery with group by +SELECT + count(*) +FROM + events_table e +WHERE + value_3 IN (SELECT min(value_3) FROM users_table WHERE user_id = e.user_id GROUP BY value_2); + count +--------------------------------------------------------------------- + 72 +(1 row) + +SELECT + count(*) +FROM + events_table e +WHERE + value_3 IN (SELECT min(value_3) FROM users_table WHERE user_id = e.user_id GROUP BY value_2); + count +--------------------------------------------------------------------- + 72 +(1 row) + +-- correlated subquery with group by +SELECT + count(*) +FROM + events_table e +WHERE + value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2); + count +--------------------------------------------------------------------- + 10 +(1 row) + +-- correlated subquery with having +SELECT + count(*) +FROM + events_table e +WHERE + value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2 HAVING min(value_3) > (SELECT 1)); + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT + count(*) +FROM + events_table e +WHERE + value_3 IN (SELECT min(value_3) v FROM users_table WHERE user_id = e.user_id GROUP BY e.value_2 HAVING min(value_3) > (SELECT e.value_3)); +ERROR: Subqueries in HAVING cannot refer to outer query +-- nested correlated subquery +SELECT + count(*) +FROM + events_table e +WHERE + value_3 IN ( + SELECT min(r.value_3) v FROM users_reference_table r JOIN (SELECT * FROM users_table WHERE user_id = e.user_id) u USING (user_id) + WHERE u.value_2 > 3 + GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3); + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- not co-located correlated subquery +SELECT + count(*) +FROM + events_table e +WHERE + value_3 IN ( + SELECT min(r.value_3) v FROM users_reference_table r JOIN (SELECT * FROM users_table WHERE value_2 = e.user_id) u USING (user_id) + WHERE u.value_2 > 3 + GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3); +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +-- cartesian correlated subquery +SELECT + count(*) +FROM + events_table e +WHERE + value_3 IN ( + SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id) + WHERE u.value_2 > 3 + GROUP BY e.value_2 HAVING min(r.value_3) > e.value_3); +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +-- even more subtle cartesian correlated subquery +SELECT + count(*) +FROM + events_table e +WHERE + value_3 IN ( + SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id) + WHERE u.value_2 > 3 + GROUP BY u.value_2 HAVING min(r.value_3) > e.value_3); +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +-- not a correlated subquery, uses recursive planning +SELECT + count(*) +FROM + events_table e +WHERE + value_3 IN ( + SELECT min(r.value_3) v FROM users_reference_table r JOIN users_table u USING (user_id) + WHERE u.value_2 > 3 + GROUP BY r.value_2 HAVING min(r.value_3) > 0); +DEBUG: generating subplan XXX_1 for subquery SELECT min(r.value_3) AS v FROM (public.users_reference_table r JOIN public.users_table u USING (user_id)) WHERE (u.value_2 OPERATOR(pg_catalog.>) 3) GROUP BY r.value_2 HAVING (min(r.value_3) OPERATOR(pg_catalog.>) (0)::double precision) +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.events_table e WHERE (value_3 OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.v FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(v double precision))) + count +--------------------------------------------------------------------- + 24 +(1 row) + +-- two levels of correlation should also allow +-- merge step in the subquery +SELECT sum(value_1) +FROM users_table u +WHERE EXISTS + (SELECT 1 + FROM events_table e + WHERE u.user_id = e.user_id AND + EXISTS + (SELECT 1 + FROM users_table u2 + WHERE u2.user_id = u.user_id AND u2.value_1 = 5 + LIMIT 1)); + sum +--------------------------------------------------------------------- + 216 +(1 row) + +-- correlated subquery in WHERE, with a slightly +-- different syntax that the result of the subquery +-- is compared with a constant +SELECT sum(value_1) +FROM users_table u1 +WHERE (SELECT COUNT(DISTINCT e1.value_2) + FROM events_table e1 + WHERE e1.user_id = u1.user_id + ) > 115; + sum +--------------------------------------------------------------------- + +(1 row) + +-- a correlated subquery which requires merge step +-- can be pushed down on UPDATE/DELETE queries as well +-- rollback to keep the rest of the tests unchanged +BEGIN; +UPDATE users_table u1 + SET value_1 = (SELECT count(DISTINCT value_2) + FROM events_table e1 + WHERE e1.user_id = u1.user_id); +DELETE FROM users_table u1 WHERE (SELECT count(DISTINCT value_2) + FROM events_table e1 + WHERE e1.user_id = u1.user_id) > 10; +ROLLBACK; +-- a correlated anti-join can also be pushed down even if the subquery +-- has a LIMIT +SELECT avg(value_1) +FROM users_table u +WHERE NOT EXISTS + (SELECT 'XXX' + FROM events_table e + WHERE u.user_id = e.user_id and e.value_2 > 10000 LIMIT 1); + avg +--------------------------------------------------------------------- + 2.5544554455445545 +(1 row) + +-- a [correlated] lateral join can also be pushed down even if the subquery +-- has an aggregate wout a GROUP BY +SELECT + max(min_of_val_2), max(u1.value_1) +FROM + users_table u1 + LEFT JOIN LATERAL + (SELECT min(e1.value_2) as min_of_val_2 FROM events_table e1 WHERE e1.user_id = u1.user_id) as foo ON (true); + max | max +--------------------------------------------------------------------- + 1 | 5 +(1 row) + +-- a self join is followed by a correlated subquery +EXPLAIN (COSTS OFF) +SELECT + * +FROM + users_table u1 JOIN users_table u2 USING (user_id) +WHERE + u1.value_1 < u2.value_1 AND + (SELECT + count(*) + FROM + events_table e1 + WHERE + e1.user_id = u2.user_id) > 10; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (Citus Adaptive) + Task Count: 4 + Tasks Shown: One of 4 + -> Task + Node: host=localhost port=xxxxx dbname=regression + -> Hash Join + Hash Cond: (u2.user_id = u1.user_id) + Join Filter: (u1.value_1 < u2.value_1) + -> Seq Scan on users_table_1400256 u2 + Filter: ((SubPlan 1) > 10) + SubPlan 1 + -> Aggregate + -> Seq Scan on events_table_1400260 e1 + Filter: (user_id = u2.user_id) + -> Hash + -> Seq Scan on users_table_1400256 u1 +(16 rows) + +-- when the colocated join of the FROM clause +-- entries happen on WHERE clause, Citus cannot +-- pushdown +-- Likely that the colocation checks should be +-- improved +SELECT + u1.user_id, u2.user_id +FROM + users_table u1, users_table u2 +WHERE + u1.value_1 < u2.value_1 AND + (SELECT + count(*) + FROM + events_table e1 + WHERE + e1.user_id = u2.user_id AND + u1.user_id = u2.user_id) > 10 +ORDER BY 1,2; +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +-- create a view that contains correlated subquery +CREATE TEMPORARY VIEW correlated_subquery_view AS + SELECT u1.user_id + FROM users_table u1 + WHERE (SELECT COUNT(DISTINCT e1.value_2) + FROM events_table e1 + WHERE e1.user_id = u1.user_id + ) > 0; +WARNING: "view correlated_subquery_view" has dependency on unsupported object "schema pg_temp_xxx" +DETAIL: "view correlated_subquery_view" will be created only locally +SELECT sum(user_id) FROM correlated_subquery_view; + sum +--------------------------------------------------------------------- + 376 +(1 row) + +-- now, join the view with another correlated subquery +SELECT + sum(mx) +FROM + correlated_subquery_view + LEFT JOIN LATERAL + (SELECT max(value_2) as mx FROM events_table WHERE correlated_subquery_view.user_id = events_table.user_id) as foo ON (true); + sum +--------------------------------------------------------------------- + 459 +(1 row) + +-- as an edge case, JOIN is on false +SELECT + sum(mx) +FROM + correlated_subquery_view + LEFT JOIN LATERAL + (SELECT max(value_2) as mx FROM events_table WHERE correlated_subquery_view.user_id = events_table.user_id) as foo ON (false); + sum +--------------------------------------------------------------------- + +(1 row) + +SELECT sum(value_1) +FROM users_table u1 +WHERE (SELECT COUNT(DISTINCT e1.value_2) + FROM events_table e1 + WHERE e1.user_id = u1.user_id AND false + ) > 115; +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +SELECT sum(value_1) +FROM users_table u1 +WHERE (SELECT COUNT(DISTINCT e1.value_2) + FROM events_table e1 + WHERE e1.user_id = u1.user_id + ) > 115 AND false; + sum +--------------------------------------------------------------------- + +(1 row) + +-- Test redundant WHERE clause (fix #7782, #7783) +CREATE TABLE t0 (vkey int4, pkey int4, c0 timestamp); +CREATE TABLE t1 (vkey int4, pkey int4, c4 timestamp, c5 text, c6 text); +CREATE TABLE t3 (vkey int4, pkey int4, c9 timestamp); +CREATE TABLE t7 (vkey int4, pkey int4); +-- DEBUG messages not needed for these tests +SET client_min_messages TO DEFAULT; +INSERT INTO t0 (vkey, pkey, c0) values +(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0)); +INSERT INTO t7 (vkey, pkey) values +(3, 59525); +SELECT create_reference_table('t1'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +SELECT create_distributed_table('t3', 'c9'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +UPDATE t0 set vkey = 117 +where (((t0.pkey) in (select t7.vkey from t7 where false + union all + select t3.pkey from t3 where false + ))) + or TRUE; +-- Local table t0 is updated +SELECT vkey, pkey, c0 FROM t0; + vkey | pkey | c0 +--------------------------------------------------------------------- + 117 | 13000 | Sat Sep 04 13:38:00 2032 +(1 row) + +-- MERGE command with redundant join can be planned locally +EXPLAIN (costs off, timing off) +MERGE INTO t0 USING t7 ON + (((t0.pkey) in (select t7.vkey from t7 where false + union all + select t1.pkey from t1 where false + ))) + or TRUE +WHEN MATCHED THEN + UPDATE SET vkey = 113; + QUERY PLAN +--------------------------------------------------------------------- + Merge on t0 + -> Nested Loop + -> Seq Scan on t7 + -> Materialize + -> Seq Scan on t0 +(5 rows) + +-- UPDATE via MERGE with redundant join clause: +MERGE INTO t0 USING t7 ON + (((t0.pkey) in (select t7.vkey from t7 where false + union all + select t1.pkey from t1 where false + ))) + or TRUE +WHEN MATCHED THEN + UPDATE SET vkey = 113; +-- Local table t0 is updated +SELECT vkey, pkey, c0 FROM t0; + vkey | pkey | c0 +--------------------------------------------------------------------- + 113 | 13000 | Sat Sep 04 13:38:00 2032 +(1 row) + +DELETE FROM t0 +where TRUE or (((t0.vkey) >= (select + pg_catalog.regexp_count(ref_0.c5, ref_0.c6) + from t1 as ref_0 where true))); +-- Local table t0 is now empty (0 rows) +SELECT vkey, pkey, c0 FROM t0; + vkey | pkey | c0 +--------------------------------------------------------------------- +(0 rows) + +INSERT INTO t3 (vkey, pkey, c9) values +(3, 13000, make_timestamp(2032, 9, 4, 13, 38, 0)); +-- Distributed table update with redundant WHERE +UPDATE t3 set vkey = 117 +where (((t3.pkey) in (select t1.vkey from t1 where false + union all + select t0.pkey from t0 join t7 on t0.pkey=t7.vkey where false + ))) + or TRUE; +SELECT vkey, pkey FROM t3; + vkey | pkey +--------------------------------------------------------------------- + 117 | 13000 +(1 row) + +-- Distributed table delete with redundant WHERE +DELETE FROM t3 +where TRUE or (((t3.vkey) >= (select + pg_catalog.regexp_count(ref_0.c5, ref_0.c6) + from t1 as ref_0 where true)) and (select max(vkey) from t0) > 0); +-- Distributed table t3 is now empty +SELECT vkey, pkey FROM t3; + vkey | pkey +--------------------------------------------------------------------- +(0 rows) + +-- Test case where citus table is reduced to a distributed subplan +-- Exposed by issue +INSERT INTO t0 (vkey, pkey, c0) values +(1, 10000, make_timestamp(2032, 9, 4, 13, 38, 0)), +(2, 11000, make_timestamp(2024, 8, 31, 17, 51, 0)), +(3, 12000, make_timestamp(2028, 4, 1, 3, 32, 0)), +(4, 13000, make_timestamp(2029, 11, 4, 13, 49, 0)), +(5, 14000, make_timestamp(2031, 3, 29, 18, 17, 0)), +(6, 15000, make_timestamp(2030, 5, 17, 11, 32, 0)), +(7, 16000, make_timestamp(2027, 9, 22, 12, 58, 0)), +(8, 17000, make_timestamp(2026, 12, 3, 13, 44, 0)), +(9, 18000, make_timestamp(2028, 2, 24, 14, 05, 0)), +(10,19000, make_timestamp(2031, 4, 14, 15, 12, 0)); +INSERT INTO t3 (vkey, pkey, c9) values +(1, 10000, make_timestamp(2032, 9, 4, 13, 38, 0)), +(2, 11000, make_timestamp(2024, 8, 31, 17, 51, 0)), +(3, 12000, make_timestamp(2028, 4, 1, 3, 32, 0)), +(4, 13000, make_timestamp(2029, 11, 4, 13, 49, 0)), +(5, 14000, make_timestamp(2031, 3, 29, 18, 17, 0)), +(6, 15000, make_timestamp(2030, 5, 17, 11, 32, 0)), +(7, 16000, make_timestamp(2027, 9, 22, 12, 58, 0)), +(8, 17000, make_timestamp(2026, 12, 3, 13, 44, 0)), +(9, 18000, make_timestamp(2028, 2, 24, 14, 05, 0)), +(10,19000, make_timestamp(2031, 4, 14, 15, 12, 0)); +-- minimal repro: without the fix, c_1 column is NULL +SELECT c_1, c_2 +FROM (SELECT + (SELECT c9 FROM t3 ORDER BY vkey limit 1 OFFSET 4) AS c_1, + ref_0.vkey as c_2 + FROM t0 AS ref_0 + WHERE true::bool +) as subq_0 +WHERE subq_0.c_2 = 7 +ORDER BY c_1, c_2; + c_1 | c_2 +--------------------------------------------------------------------- + Sat Mar 29 18:17:00 2031 | 7 +(1 row) + +-- remove redundant WHERE clause => same result set +SELECT c_1, c_2 +FROM (SELECT + (SELECT c9 FROM t3 ORDER BY vkey limit 1 OFFSET 4) AS c_1, + ref_0.vkey as c_2 + FROM t0 AS ref_0 +) as subq_0 +WHERE subq_0.c_2 = 7 +ORDER BY c_1, c_2; + c_1 | c_2 +--------------------------------------------------------------------- + Sat Mar 29 18:17:00 2031 | 7 +(1 row) + +-- Repro query from issue #8313 +SELECT c_0, c_1, c_2, c_7 +FROM (SELECT (SELECT c0 FROM t0 ORDER BY vkey LIMIT 1 OFFSET 2) AS c_0, + (SELECT c9 FROM t3 ORDER BY vkey limit 1 OFFSET 4) AS c_1, + ref_0.vkey as c_2, + ref_0.vkey as c_3, + (SELECT pg_catalog.min(vkey) FROM t0) AS c_7 + FROM t0 AS ref_0 + WHERE true::bool + ORDER BY c_0 DESC, c_1 DESC, c_2 ASC, c_3 ASC, c_7 ASC +) as subq_0 +where (((select vkey from t3 order by vkey limit 1 offset 6) + ) between ((subq_0.c_7)) and (subq_0.c_3)) +order by c_0, c_1, c_2; + c_0 | c_1 | c_2 | c_7 +--------------------------------------------------------------------- + Sat Apr 01 03:32:00 2028 | Sat Mar 29 18:17:00 2031 | 7 | 1 + Sat Apr 01 03:32:00 2028 | Sat Mar 29 18:17:00 2031 | 8 | 1 + Sat Apr 01 03:32:00 2028 | Sat Mar 29 18:17:00 2031 | 9 | 1 + Sat Apr 01 03:32:00 2028 | Sat Mar 29 18:17:00 2031 | 10 | 1 +(4 rows) + +-- Variant of redundant WHERE clause +SELECT c_0, c_1, c_2, c_7 +FROM (SELECT (SELECT c0 FROM t0 ORDER BY vkey LIMIT 1 OFFSET 2) AS c_0, + (SELECT c9 FROM t3 ORDER BY vkey limit 1 OFFSET 4) AS c_1, + ref_0.vkey as c_2, + ref_0.vkey as c_3, + (SELECT pg_catalog.min(vkey) FROM t0) AS c_7 + FROM t0 AS ref_0 + WHERE true::bool or (ref_0.vkey % 3 = 0) + ORDER BY c_0 DESC, c_1 DESC, c_2 ASC, c_3 ASC, c_7 ASC +) as subq_0 +where (((select vkey from t3 order by vkey limit 1 offset 6) + ) between ((subq_0.c_7)) and (subq_0.c_3)) +order by c_0, c_1, c_2; + c_0 | c_1 | c_2 | c_7 +--------------------------------------------------------------------- + Sat Apr 01 03:32:00 2028 | Sat Mar 29 18:17:00 2031 | 7 | 1 + Sat Apr 01 03:32:00 2028 | Sat Mar 29 18:17:00 2031 | 8 | 1 + Sat Apr 01 03:32:00 2028 | Sat Mar 29 18:17:00 2031 | 9 | 1 + Sat Apr 01 03:32:00 2028 | Sat Mar 29 18:17:00 2031 | 10 | 1 +(4 rows) + +-- Remove redundant WHERE clause => same result set +SELECT c_0, c_1, c_2, c_7 +FROM (SELECT (SELECT c0 FROM t0 ORDER BY vkey LIMIT 1 OFFSET 2) AS c_0, + (SELECT c9 FROM t3 ORDER BY vkey limit 1 OFFSET 4) AS c_1, + ref_0.vkey as c_2, + ref_0.vkey as c_3, + (SELECT pg_catalog.min(vkey) FROM t0) AS c_7 + FROM t0 AS ref_0 + ORDER BY c_0 DESC, c_1 DESC, c_2 ASC, c_3 ASC, c_7 ASC +) as subq_0 +where (((select vkey from t3 order by vkey limit 1 offset 6) + ) between ((subq_0.c_7)) and (subq_0.c_3)) +order by c_0, c_1, c_2; + c_0 | c_1 | c_2 | c_7 +--------------------------------------------------------------------- + Sat Apr 01 03:32:00 2028 | Sat Mar 29 18:17:00 2031 | 7 | 1 + Sat Apr 01 03:32:00 2028 | Sat Mar 29 18:17:00 2031 | 8 | 1 + Sat Apr 01 03:32:00 2028 | Sat Mar 29 18:17:00 2031 | 9 | 1 + Sat Apr 01 03:32:00 2028 | Sat Mar 29 18:17:00 2031 | 10 | 1 +(4 rows) + +-- Redundant WHERE clause with distributed parititioned table +CREATE TABLE a (a int); +INSERT INTO a VALUES (1); +-- populated distributed partitioned table +create table partitioned_table (a INT UNIQUE) PARTITION BY RANGE(a); +CREATE TABLE par_1 PARTITION OF partitioned_table FOR VALUES FROM (1) TO (41); +CREATE TABLE par_2 PARTITION OF partitioned_table FOR VALUES FROM (41) TO (81); +CREATE TABLE par_3 PARTITION OF partitioned_table FOR VALUES FROM (81) TO (121); +CREATE TABLE par_4 PARTITION OF partitioned_table FOR VALUES FROM (121) TO (161); +SELECT create_distributed_table('partitioned_table', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +insert into partitioned_table(a) select i from generate_series(1,160) i; +-- test citus table in init plan +-- with redundant WHERE clause +SELECT CASE WHEN EXISTS ( + SELECT * FROM partitioned_table + ) THEN 1 ELSE 0 END AS table_non_empty +FROM a +WHERE true; + table_non_empty +--------------------------------------------------------------------- + 1 +(1 row) + +-- test citus table in init plan +-- with redundant WHERE clause involving +-- a citus table +SELECT CASE WHEN EXISTS ( + SELECT * FROM partitioned_table + ) THEN 1 ELSE 0 END AS table_non_empty +FROM a +WHERE true OR NOT EXISTS (SELECT 1 FROM t1); + table_non_empty +--------------------------------------------------------------------- + 1 +(1 row) + +-- Test crash fix for issue #8548 +-- A query with LEFT JOIN to a distributed table and correlated subqueries +-- could crash because PostgreSQL's setrefs.c sets subplan list entries to +-- NULL when AlternativeSubPlan resolution discards unused alternatives. +-- PlanContainsDistributedSubPlanRTE must skip NULL entries. +CREATE TABLE t4 (vkey integer, pkey integer, c30 integer, c31 integer, c32 text); +CREATE TABLE t5 (vkey integer, pkey integer, c33 text, c34 integer, c35 integer, + c36 timestamp without time zone); +CREATE TABLE t2 (vkey integer, pkey integer, c15 numeric, c16 timestamp without time zone, + c17 text, c18 text, c19 timestamp without time zone, + c20 timestamp without time zone, c21 integer); +CREATE TABLE t22 (vkey integer, pkey integer, c37 numeric, c38 text, c39 numeric, + c40 numeric, c41 numeric, c42 integer, + c43 timestamp without time zone, c44 numeric, + colocated_key numeric); +SELECT create_distributed_table('t22', 'colocated_key'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- This query should not crash (issue #8548) +SELECT + 70 AS c_0 +FROM + ( + SELECT + ( + EXISTS ( + SELECT ref_5.c33 AS c_0 + FROM t5 AS ref_5 + WHERE (make_timestamp(2001, 7, 13, 17, 53, 31)) = (ref_1.c43) + ) + ) AS c_0 + FROM + ( + t4 AS ref_0 + LEFT OUTER JOIN t22 AS ref_1 + ON (ref_0.vkey = ref_1.vkey) + ) + WHERE + ( + (ref_0.c31) >= ( + SELECT ref_1.pkey AS c_0 + FROM t2 AS ref_2 + WHERE (true) < ((ref_2.c17) ^@ (ref_0.c32)) + ORDER BY c_0 DESC + LIMIT 1 + ) + ) IN ( + SELECT (ref_1.c40) <= (ref_1.c37) AS c_0 + FROM t7 AS ref_4 + WHERE NOT ((ref_1.c40) <> (ref_1.c41)) + ) + ) AS subq_0 +WHERE + (TRUE) < (TRUE); +ERROR: Distributed queries with outer joins and pseudoconstant quals are not supported in PG16. +DETAIL: PG16 disallows replacing joins with scans when the query has pseudoconstant quals +HINT: Consider upgrading your PG version to PG17+ +DROP TABLE t4; +DROP TABLE t5; +DROP TABLE t2; +DROP TABLE t22; +DROP TABLE local_table; +DROP TABLE t0; +DROP TABLE t1; +DROP TABLE t3; +DROP TABLE t7; +DROP TABLE a; +DROP TABLE partitioned_table CASCADE; +DROP SCHEMA subquery_in_where CASCADE; +SET search_path TO public; diff --git a/src/test/regress/sql/subquery_in_where.sql b/src/test/regress/sql/subquery_in_where.sql index 6fb634fc768..b51e1f90f1f 100644 --- a/src/test/regress/sql/subquery_in_where.sql +++ b/src/test/regress/sql/subquery_in_where.sql @@ -1053,6 +1053,65 @@ SELECT CASE WHEN EXISTS ( FROM a WHERE true OR NOT EXISTS (SELECT 1 FROM t1); +-- Test crash fix for issue #8548 +-- A query with LEFT JOIN to a distributed table and correlated subqueries +-- could crash because PostgreSQL's setrefs.c sets subplan list entries to +-- NULL when AlternativeSubPlan resolution discards unused alternatives. +-- PlanContainsDistributedSubPlanRTE must skip NULL entries. +CREATE TABLE t4 (vkey integer, pkey integer, c30 integer, c31 integer, c32 text); +CREATE TABLE t5 (vkey integer, pkey integer, c33 text, c34 integer, c35 integer, + c36 timestamp without time zone); +CREATE TABLE t2 (vkey integer, pkey integer, c15 numeric, c16 timestamp without time zone, + c17 text, c18 text, c19 timestamp without time zone, + c20 timestamp without time zone, c21 integer); +CREATE TABLE t22 (vkey integer, pkey integer, c37 numeric, c38 text, c39 numeric, + c40 numeric, c41 numeric, c42 integer, + c43 timestamp without time zone, c44 numeric, + colocated_key numeric); +SELECT create_distributed_table('t22', 'colocated_key'); + +-- This query should not crash (issue #8548) +SELECT + 70 AS c_0 +FROM + ( + SELECT + ( + EXISTS ( + SELECT ref_5.c33 AS c_0 + FROM t5 AS ref_5 + WHERE (make_timestamp(2001, 7, 13, 17, 53, 31)) = (ref_1.c43) + ) + ) AS c_0 + FROM + ( + t4 AS ref_0 + LEFT OUTER JOIN t22 AS ref_1 + ON (ref_0.vkey = ref_1.vkey) + ) + WHERE + ( + (ref_0.c31) >= ( + SELECT ref_1.pkey AS c_0 + FROM t2 AS ref_2 + WHERE (true) < ((ref_2.c17) ^@ (ref_0.c32)) + ORDER BY c_0 DESC + LIMIT 1 + ) + ) IN ( + SELECT (ref_1.c40) <= (ref_1.c37) AS c_0 + FROM t7 AS ref_4 + WHERE NOT ((ref_1.c40) <> (ref_1.c41)) + ) + ) AS subq_0 +WHERE + (TRUE) < (TRUE); + +DROP TABLE t4; +DROP TABLE t5; +DROP TABLE t2; +DROP TABLE t22; + DROP TABLE local_table; DROP TABLE t0; DROP TABLE t1;