Skip to content

Commit a47f6d3

Browse files
Nguyễn Phúc Lươngclaude
andcommitted
fix(compiler): scope-aware column resolution in subqueries (#4251)
When resolving an unqualified column reference inside a subquery, sqlc's analyzer matched against every table in scope across the whole query, producing a spurious "column reference ... is ambiguous" error for queries that real PostgreSQL resolves unambiguously via lexical scope. Example from the issue: SELECT * FROM t1 WHERE id IN ( SELECT t1_id FROM t2 WHERE id = $1 ); Real Postgres binds the outer "id" to t1.id and the inner "id" to t2.id by lexical scope. sqlc was flattening every FROM-clause RangeVar in the whole query into one search list and triggering "ambiguous" on the inner id. Two small changes: * internal/compiler/find_params.go — when paramSearch.Visit enters a SelectStmt whose FROM clause has exactly one RangeVar, capture it as the current scope (paramSearch.rangeVar). The walker propagates this through the returned visitor, so ParamRefs encountered in the same SelectStmt's WHERE/GROUP/etc. inherit the inner scope. The exactly-one guard ensures we don't silently pick a winner when the FROM clause is genuinely multi-table at the same level — those cases must keep iterating every table so true ambiguity (e.g. "SELECT t1.id FROM t1, t2 WHERE id = $1") still errors. * internal/compiler/resolve.go — when resolving an unqualified column, if no alias is given but ref.rv points to an in-scope table that actually contains the column, narrow the search to that table only. If the column is absent from the inner table, fall back to the full search list so correlated-subquery references to an outer column continue to work. Tests: * internal/compiler/resolve_test.go — new unit test covering narrowToInnermostScope across nil-rv, column-in-inner-scope (narrow), column-absent (fall back), and unknown-table (fall back). * internal/endtoend/testdata/subquery_scope_4251/ — end-to-end fixture reproducing the exact query from the issue under postgresql/pgx/v5; generated code asserted against the golden files. Verified locally: * /tmp/repro4251 generate now returns EXIT=0 (was EXIT=1 with 'relation "id" ambiguous'). * SELECT t1.id FROM t1, t2 WHERE id = $1 still correctly errors with 'ambiguous'. * Correlated subquery (SELECT SUM(total) FROM orders WHERE customer_id = c.id) still resolves cleanly. * go test ./internal/compiler/... ./internal/sql/... ./internal/engine/... passes; gofmt -l and go vet clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent a3b0cfd commit a47f6d3

9 files changed

Lines changed: 277 additions & 10 deletions

File tree

internal/compiler/find_params.go

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,20 @@ func findParameters(root ast.Node) ([]paramRef, []error) {
2020
}
2121

2222
type paramRef struct {
23-
parent ast.Node
24-
rv *ast.RangeVar
25-
ref *ast.ParamRef
26-
name string // Named parameter support
23+
parent ast.Node
24+
rv *ast.RangeVar
25+
ref *ast.ParamRef
26+
name string // Named parameter support
27+
inSubquery bool // true if this ParamRef sits inside a SubLink's subselect; gates scope narrowing to subqueries only (issue #4251)
2728
}
2829

2930
type paramSearch struct {
30-
parent ast.Node
31-
rangeVar *ast.RangeVar
32-
refs *[]paramRef
33-
seen map[int]struct{}
34-
errs *[]error
31+
parent ast.Node
32+
rangeVar *ast.RangeVar
33+
refs *[]paramRef
34+
seen map[int]struct{}
35+
errs *[]error
36+
inSubquery bool // true once we have descended into a SubLink's subselect; propagates to ParamRefs encountered below it (issue #4251)
3537

3638
// XXX: Gross state hack for limit
3739
limitCount ast.Node
@@ -139,6 +141,22 @@ func (p paramSearch) Visit(node ast.Node) astutils.Visitor {
139141
case *ast.ResTarget:
140142
p.parent = node
141143

144+
case *ast.SubLink:
145+
// Issue #4251: when descending into a SubLink's subselect (e.g.
146+
// "x IN (SELECT y FROM t WHERE id = $1)"), capture the subselect's
147+
// first FROM-clause RangeVar so ParamRefs inside its WHERE/GROUP/etc.
148+
// resolve against the inner scope. Only narrow when the subselect's
149+
// FROM is unambiguous on its own (single RangeVar; no JOINs / no
150+
// multi-table FROM / no nested subselect). Scoped to SubLinks
151+
// specifically: top-level INSERT-SELECT / JOIN / etc. continue to use
152+
// the full-table search the resolver has always done.
153+
p.inSubquery = true
154+
if sel, ok := n.Subselect.(*ast.SelectStmt); ok && sel.FromClause != nil && len(sel.FromClause.Items) == 1 {
155+
if rv, ok := sel.FromClause.Items[0].(*ast.RangeVar); ok && rv != nil && rv.Relname != nil {
156+
p.rangeVar = rv
157+
}
158+
}
159+
142160
case *ast.SelectStmt:
143161
if n.LimitCount != nil {
144162
p.limitCount = n.LimitCount
@@ -186,7 +204,7 @@ func (p paramSearch) Visit(node ast.Node) astutils.Visitor {
186204
}
187205

188206
if set {
189-
*p.refs = append(*p.refs, paramRef{parent: parent, ref: n, rv: p.rangeVar})
207+
*p.refs = append(*p.refs, paramRef{parent: parent, ref: n, rv: p.rangeVar, inSubquery: p.inSubquery})
190208
p.seen[n.Location] = struct{}{}
191209
}
192210
return nil

internal/compiler/resolve.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,10 @@ func (comp *Compiler) resolveCatalogRefs(qc *QueryCatalog, rvs []*ast.RangeVar,
215215
}
216216
}
217217
}
218+
} else if ref.inSubquery {
219+
if scoped := narrowToInnermostScope(tables, typeMap, c.DefaultSchema, ref.rv, key); scoped != nil {
220+
search = scoped
221+
}
218222
}
219223

220224
var found int
@@ -581,6 +585,10 @@ func (comp *Compiler) resolveCatalogRefs(qc *QueryCatalog, rvs []*ast.RangeVar,
581585
}
582586
}
583587
}
588+
} else if ref.inSubquery {
589+
if scoped := narrowToInnermostScope(tables, typeMap, c.DefaultSchema, ref.rv, key); scoped != nil {
590+
search = scoped
591+
}
584592
}
585593

586594
for _, table := range search {
@@ -636,3 +644,48 @@ func (comp *Compiler) resolveCatalogRefs(qc *QueryCatalog, rvs []*ast.RangeVar,
636644
}
637645
return a, nil
638646
}
647+
648+
// narrowToInnermostScope returns a single-element search slice when the
649+
// parameter reference (ref.rv) points to a known table that actually
650+
// contains the column being resolved. It implements the lexical-scope rule
651+
// real PostgreSQL applies inside subqueries: an unqualified column reference
652+
// is bound to the innermost FROM-clause table that defines it. If the
653+
// innermost scope is unknown (rv nil) or the column is not present there
654+
// (correlated-subquery referring to an outer column), it returns nil so the
655+
// caller falls back to the full table list. See issue #4251.
656+
func narrowToInnermostScope(
657+
tables []*ast.TableName,
658+
typeMap map[string]map[string]map[string]*catalog.Column,
659+
defaultSchema string,
660+
rv *ast.RangeVar,
661+
column string,
662+
) []*ast.TableName {
663+
if rv == nil || rv.Relname == nil {
664+
return nil
665+
}
666+
innerName := *rv.Relname
667+
innerSchema := ""
668+
if rv.Schemaname != nil {
669+
innerSchema = *rv.Schemaname
670+
}
671+
lookupSchema := innerSchema
672+
if lookupSchema == "" {
673+
lookupSchema = defaultSchema
674+
}
675+
// Only narrow if the column actually exists in the innermost scope.
676+
// Falling back to the full search preserves correlated-subquery
677+
// behavior (e.g. an inner WHERE referring to an outer column).
678+
if _, ok := typeMap[lookupSchema][innerName][column]; !ok {
679+
return nil
680+
}
681+
for _, t := range tables {
682+
tSchema := t.Schema
683+
if tSchema == "" {
684+
tSchema = defaultSchema
685+
}
686+
if t.Name == innerName && tSchema == lookupSchema {
687+
return []*ast.TableName{t}
688+
}
689+
}
690+
return nil
691+
}

internal/compiler/resolve_test.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
package compiler
2+
3+
import (
4+
"testing"
5+
6+
"github.com/sqlc-dev/sqlc/internal/sql/ast"
7+
"github.com/sqlc-dev/sqlc/internal/sql/catalog"
8+
)
9+
10+
// TestNarrowToInnermostScope covers the lexical-scope rule used by the
11+
// resolver to disambiguate column references that live inside subqueries.
12+
// See issue #4251.
13+
func TestNarrowToInnermostScope(t *testing.T) {
14+
t.Parallel()
15+
16+
str := func(s string) *string { return &s }
17+
tables := []*ast.TableName{
18+
{Name: "t1"},
19+
{Name: "t2"},
20+
}
21+
typeMap := map[string]map[string]map[string]*catalog.Column{
22+
"public": {
23+
"t1": {"id": {Name: "id"}},
24+
"t2": {"id": {Name: "id"}, "t1_id": {Name: "t1_id"}},
25+
},
26+
}
27+
28+
t.Run("nil_rv_returns_nil", func(t *testing.T) {
29+
got := narrowToInnermostScope(tables, typeMap, "public", nil, "id")
30+
if got != nil {
31+
t.Fatalf("expected nil (no narrowing) got %v", got)
32+
}
33+
})
34+
35+
t.Run("nil_relname_returns_nil", func(t *testing.T) {
36+
got := narrowToInnermostScope(tables, typeMap, "public", &ast.RangeVar{}, "id")
37+
if got != nil {
38+
t.Fatalf("expected nil (no narrowing) got %v", got)
39+
}
40+
})
41+
42+
t.Run("column_in_inner_scope_narrows_to_inner_table", func(t *testing.T) {
43+
// The repro shape: ParamRef in inner SELECT (rv=t2) resolving column "id".
44+
// id exists in t2 -> narrow search to [t2] so the outer t1.id doesn't
45+
// trigger a spurious "ambiguous" error.
46+
rv := &ast.RangeVar{Relname: str("t2")}
47+
got := narrowToInnermostScope(tables, typeMap, "public", rv, "id")
48+
if len(got) != 1 || got[0].Name != "t2" {
49+
t.Fatalf("expected narrow to [t2], got %v", got)
50+
}
51+
})
52+
53+
t.Run("column_absent_from_inner_falls_back_to_full_scope", func(t *testing.T) {
54+
// Correlated-subquery shape: inner SELECT (rv=t2) references an outer
55+
// column not present in t2. Returning nil tells the caller to keep the
56+
// full tables list, which lets the outer-scope match win.
57+
rv := &ast.RangeVar{Relname: str("t2")}
58+
got := narrowToInnermostScope(tables, typeMap, "public", rv, "not_a_t2_column")
59+
if got != nil {
60+
t.Fatalf("expected nil (fall back to all tables), got %v", got)
61+
}
62+
})
63+
64+
t.Run("rv_points_to_unknown_table_falls_back", func(t *testing.T) {
65+
rv := &ast.RangeVar{Relname: str("nonexistent")}
66+
got := narrowToInnermostScope(tables, typeMap, "public", rv, "id")
67+
if got != nil {
68+
t.Fatalf("expected nil (fall back), got %v", got)
69+
}
70+
})
71+
}

internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/go/db.go

Lines changed: 32 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/go/models.go

Lines changed: 18 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

internal/endtoend/testdata/subquery_scope_4251/postgresql/pgx/v5/go/query.sql.go

Lines changed: 44 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
-- name: GetT1FromT2 :many
2+
-- Unqualified `id` inside the subquery must bind to t2.id (innermost
3+
-- FROM-clause scope), not be flagged as ambiguous against t1.id. See
4+
-- issue #4251.
5+
SELECT id FROM t1
6+
WHERE id IN (
7+
SELECT t1_id
8+
FROM t2
9+
WHERE id = $1
10+
);
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CREATE TABLE t1 (
2+
id UUID PRIMARY KEY
3+
);
4+
5+
CREATE TABLE t2 (
6+
id UUID,
7+
t1_id UUID REFERENCES t1(id) ON DELETE CASCADE
8+
);
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"version": "1",
3+
"packages": [
4+
{
5+
"path": "go",
6+
"engine": "postgresql",
7+
"sql_package": "pgx/v5",
8+
"name": "querytest",
9+
"schema": "schema.sql",
10+
"queries": "query.sql"
11+
}
12+
]
13+
}

0 commit comments

Comments
 (0)