Skip to content

Commit 12d3e86

Browse files
author
Saurabh Badenkal
committed
Adjust guardrails: remove TOP injection (server enforces 5000 cap), add SELECT * + JOIN warning
Live-tested against Aurora VM (aurorabapenv71aff.crm10.dynamics.com): - Server auto-caps at 5000 rows without TOP -> no client-side injection needed - Server blocks SELECT * on both single-table and JOIN queries -> SDK expansion confirmed needed - Added warning when SELECT * used with JOIN (expansion only includes first table columns) 732 tests passing.
1 parent 7f06533 commit 12d3e86

File tree

2 files changed

+97
-101
lines changed

2 files changed

+97
-101
lines changed

src/PowerPlatform/Dataverse/data/_odata.py

Lines changed: 27 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -838,14 +838,12 @@ def _do_request(url: str, *, params: Optional[Dict[str, Any]] = None) -> Dict[st
838838
r"^\s*(?:INSERT|UPDATE|DELETE|DROP|TRUNCATE|ALTER|CREATE|EXEC|GRANT|REVOKE|BULK)\b",
839839
re.IGNORECASE,
840840
)
841-
_SQL_HAS_TOP_RE = re.compile(r"\bTOP\s+\d+", re.IGNORECASE)
842-
_SQL_HAS_OFFSET_RE = re.compile(r"\bOFFSET\s+\d+\s+ROWS\b", re.IGNORECASE)
843841
_SQL_LEADING_WILDCARD_RE = re.compile(r"\bLIKE\s+'%[^']", re.IGNORECASE)
844842
_SQL_IMPLICIT_CROSS_JOIN_RE = re.compile(
845843
r"\bFROM\s+[A-Za-z0-9_]+\s+[A-Za-z0-9_]+\s*,\s*[A-Za-z0-9_]+",
846844
re.IGNORECASE,
847845
)
848-
_SQL_DEFAULT_TOP = 5000
846+
_SQL_HAS_JOIN_RE = re.compile(r"\bJOIN\b", re.IGNORECASE)
849847

850848
def _expand_select_star(self, sql: str, table: str) -> str:
851849
"""Replace ``SELECT *`` with explicit column names.
@@ -854,10 +852,26 @@ def _expand_select_star(self, sql: str, table: str) -> str:
854852
an error ("SELECT * is not supported"). This helper resolves all
855853
columns via ``_list_columns`` and rewrites the query so the user
856854
never has to know the server limitation.
855+
856+
For JOIN queries, the expansion only includes columns from the first
857+
(FROM) table. A warning is emitted so the user knows to specify
858+
columns explicitly for multi-table queries.
857859
"""
858860
if not self._SELECT_STAR_RE.search(sql):
859861
return sql
860862

863+
# Warn on SELECT * with JOINs -- expansion uses only the FROM table
864+
if self._SQL_HAS_JOIN_RE.search(sql):
865+
warnings.warn(
866+
"SELECT * with JOIN: the SDK expands * using columns from "
867+
"the first table only. Columns from joined tables will not "
868+
"be included. Specify columns explicitly for JOINs "
869+
"(e.g. SELECT a.name, c.fullname FROM account a "
870+
"JOIN contact c ON ...).",
871+
UserWarning,
872+
stacklevel=4,
873+
)
874+
861875
cols = self._list_columns(
862876
table,
863877
select=["LogicalName"],
@@ -877,15 +891,17 @@ def _sql_guardrails(self, sql: str) -> str:
877891
1. **Block write statements** -- ``INSERT``, ``UPDATE``, ``DELETE``,
878892
``DROP``, ``TRUNCATE``, ``ALTER``, ``CREATE``, ``EXEC``, ``GRANT``,
879893
``REVOKE``, ``BULK`` are rejected with ``ValidationError``.
880-
2. **Auto-inject TOP** -- if the query has no ``TOP`` or ``OFFSET``
881-
clause, ``TOP 5000`` is injected after ``SELECT`` (or after
882-
``DISTINCT``) and a ``UserWarning`` is emitted so the caller
883-
knows the result set was capped.
884-
3. **Warn on leading-wildcard LIKE** -- ``LIKE '%...'`` patterns
894+
2. **Warn on leading-wildcard LIKE** -- ``LIKE '%...'`` patterns
885895
force full table scans and hurt shared database performance.
886-
4. **Warn on implicit cross joins** -- ``FROM a, b`` (comma syntax)
896+
3. **Warn on implicit cross joins** -- ``FROM a, b`` (comma syntax)
887897
produces cartesian products.
888898
899+
.. note::
900+
The server enforces a 5000-row maximum per query and blocks
901+
``SELECT *`` directly. The SDK handles ``SELECT *`` via
902+
``_expand_select_star``. No client-side TOP injection is needed
903+
because the server already caps results.
904+
889905
:param sql: The SQL string (already stripped).
890906
:return: Possibly-rewritten SQL string.
891907
:raises ValidationError: If the SQL contains a write statement.
@@ -899,31 +915,7 @@ def _sql_guardrails(self, sql: str) -> str:
899915
subcode=VALIDATION_SQL_WRITE_BLOCKED,
900916
)
901917

902-
# 2. Auto-inject TOP if missing
903-
if not self._SQL_HAS_TOP_RE.search(sql) and not self._SQL_HAS_OFFSET_RE.search(sql):
904-
# Inject TOP 5000 after SELECT [DISTINCT]
905-
def _inject_top(m):
906-
return f"{m.group(0)}TOP {self._SQL_DEFAULT_TOP} "
907-
908-
sql_new = re.sub(
909-
r"\bSELECT(\s+DISTINCT)?\s",
910-
_inject_top,
911-
sql,
912-
count=1,
913-
flags=re.IGNORECASE,
914-
)
915-
if sql_new != sql:
916-
warnings.warn(
917-
f"Query has no TOP or OFFSET clause. "
918-
f"SDK auto-injected TOP {self._SQL_DEFAULT_TOP} to prevent "
919-
f"unbounded result sets. Add an explicit TOP or "
920-
f"OFFSET...FETCH to suppress this warning.",
921-
UserWarning,
922-
stacklevel=4,
923-
)
924-
sql = sql_new
925-
926-
# 3. Warn on leading-wildcard LIKE
918+
# 2. Warn on leading-wildcard LIKE
927919
if self._SQL_LEADING_WILDCARD_RE.search(sql):
928920
warnings.warn(
929921
"Query contains a leading-wildcard LIKE pattern "
@@ -934,7 +926,7 @@ def _inject_top(m):
934926
stacklevel=4,
935927
)
936928

937-
# 4. Warn on implicit cross joins
929+
# 3. Warn on implicit cross joins
938930
if self._SQL_IMPLICIT_CROSS_JOIN_RE.search(sql):
939931
warnings.warn(
940932
"Query appears to use an implicit cross join "

tests/unit/data/test_sql_guardrails.py

Lines changed: 70 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -81,78 +81,37 @@ def test_select_with_delete_in_where_not_blocked(self):
8181

8282

8383
# ===================================================================
84-
# 2. Auto-inject TOP when missing
84+
# 2. Server enforces TOP 5000 (no client-side injection needed)
8585
# ===================================================================
8686

8787

88-
class TestAutoInjectTop:
89-
"""Queries without TOP or OFFSET should get TOP 5000 injected."""
88+
class TestNoTopInjection:
89+
"""Verify the SDK does NOT inject TOP -- server handles the 5000 cap."""
9090

91-
def test_no_top_injects_top_5000(self):
91+
def test_no_top_passes_through_unchanged(self):
9292
c = _client()
93-
with warnings.catch_warnings(record=True) as w:
94-
warnings.simplefilter("always")
95-
result = c._sql_guardrails("SELECT name FROM account")
96-
assert len(w) >= 1
97-
assert "TOP 5000" in str(w[0].message)
98-
assert "TOP 5000" in result
93+
sql = "SELECT name FROM account"
94+
result = c._sql_guardrails(sql)
95+
assert result == sql
96+
assert "TOP" not in result
9997

10098
def test_existing_top_not_modified(self):
10199
c = _client()
102-
with warnings.catch_warnings(record=True) as w:
103-
warnings.simplefilter("always")
104-
result = c._sql_guardrails("SELECT TOP 100 name FROM account")
105-
top_warnings = [x for x in w if "TOP" in str(x.message)]
106-
assert len(top_warnings) == 0
100+
result = c._sql_guardrails("SELECT TOP 100 name FROM account")
107101
assert "TOP 100" in result
108-
assert "TOP 5000" not in result
109-
110-
def test_existing_offset_not_modified(self):
111-
c = _client()
112-
with warnings.catch_warnings(record=True) as w:
113-
warnings.simplefilter("always")
114-
result = c._sql_guardrails(
115-
"SELECT name FROM account ORDER BY name " "OFFSET 10 ROWS FETCH NEXT 5 ROWS ONLY"
116-
)
117-
top_warnings = [x for x in w if "TOP" in str(x.message)]
118-
assert len(top_warnings) == 0
119-
assert "TOP 5000" not in result
120102

121-
def test_distinct_gets_top_after_select_distinct(self):
103+
def test_offset_passes_through(self):
122104
c = _client()
123-
with warnings.catch_warnings(record=True):
124-
warnings.simplefilter("always")
125-
result = c._sql_guardrails("SELECT DISTINCT name FROM account")
126-
assert "SELECT DISTINCT TOP 5000" in result or "SELECT DISTINCT TOP 5000" in result.upper()
105+
sql = "SELECT name FROM account ORDER BY name OFFSET 10 ROWS FETCH NEXT 5 ROWS ONLY"
106+
result = c._sql_guardrails(sql)
107+
assert result == sql
127108

128-
def test_count_star_gets_top(self):
109+
def test_join_without_top_not_modified(self):
129110
c = _client()
130-
with warnings.catch_warnings(record=True) as w:
131-
warnings.simplefilter("always")
132-
result = c._sql_guardrails("SELECT COUNT(*) FROM account")
133-
assert any("TOP 5000" in str(x.message) for x in w)
134-
assert "TOP 5000" in result
135-
136-
def test_join_without_top_gets_top(self):
137-
c = _client()
138-
with warnings.catch_warnings(record=True) as w:
139-
warnings.simplefilter("always")
140-
result = c._sql_guardrails(
141-
"SELECT a.name, c.fullname FROM account a " "JOIN contact c ON a.accountid = c.parentcustomerid"
142-
)
143-
assert any("TOP 5000" in str(x.message) for x in w)
144-
assert "TOP 5000" in result
145-
146-
def test_join_with_top_not_modified(self):
147-
c = _client()
148-
with warnings.catch_warnings(record=True) as w:
149-
warnings.simplefilter("always")
150-
result = c._sql_guardrails(
151-
"SELECT TOP 10 a.name FROM account a " "JOIN contact c ON a.accountid = c.parentcustomerid"
152-
)
153-
top_warnings = [x for x in w if "TOP" in str(x.message)]
154-
assert len(top_warnings) == 0
155-
assert "TOP 10" in result
111+
sql = "SELECT a.name, c.fullname FROM account a " "JOIN contact c ON a.accountid = c.parentcustomerid"
112+
result = c._sql_guardrails(sql)
113+
assert result == sql
114+
assert "TOP" not in result
156115

157116

158117
# ===================================================================
@@ -232,7 +191,52 @@ def test_single_table_no_warning(self):
232191

233192

234193
# ===================================================================
235-
# 5. Integration: _query_sql applies guardrails
194+
# 5. SELECT * with JOIN warning (from _expand_select_star)
195+
# ===================================================================
196+
197+
198+
class TestSelectStarJoinWarning:
199+
"""SELECT * with JOIN should warn that only first table columns are used."""
200+
201+
def test_select_star_with_join_warns(self):
202+
c = _client()
203+
c._list_columns = MagicMock(return_value=[{"LogicalName": "name"}, {"LogicalName": "accountid"}])
204+
with warnings.catch_warnings(record=True) as w:
205+
warnings.simplefilter("always")
206+
c._expand_select_star(
207+
"SELECT * FROM account a JOIN contact c ON a.accountid = c.parentcustomerid",
208+
"account",
209+
)
210+
join_warnings = [x for x in w if "JOIN" in str(x.message)]
211+
assert len(join_warnings) == 1
212+
assert "first table only" in str(join_warnings[0].message)
213+
214+
def test_select_star_no_join_no_warning(self):
215+
c = _client()
216+
c._list_columns = MagicMock(return_value=[{"LogicalName": "name"}])
217+
with warnings.catch_warnings(record=True) as w:
218+
warnings.simplefilter("always")
219+
c._expand_select_star("SELECT * FROM account", "account")
220+
join_warnings = [x for x in w if "JOIN" in str(x.message)]
221+
assert len(join_warnings) == 0
222+
223+
def test_no_star_with_join_no_warning(self):
224+
c = _client()
225+
c._list_columns = MagicMock()
226+
with warnings.catch_warnings(record=True) as w:
227+
warnings.simplefilter("always")
228+
c._expand_select_star(
229+
"SELECT a.name, c.fullname FROM account a " "JOIN contact c ON a.accountid = c.parentcustomerid",
230+
"account",
231+
)
232+
# _list_columns not called (no star), so no JOIN warning
233+
c._list_columns.assert_not_called()
234+
join_warnings = [x for x in w if "JOIN" in str(x.message)]
235+
assert len(join_warnings) == 0
236+
237+
238+
# ===================================================================
239+
# 6. Integration: _query_sql applies guardrails
236240
# ===================================================================
237241

238242

@@ -246,22 +250,23 @@ def test_write_blocked_before_server_call(self):
246250
c._query_sql("DELETE FROM account WHERE name = 'x'")
247251
c._request.assert_not_called()
248252

249-
def test_top_injected_in_server_request(self):
253+
def test_no_top_injection_in_server_request(self):
254+
"""Server manages the 5000 cap -- SDK should not inject TOP."""
250255
c = _client()
251256
c._entity_set_from_schema_name = MagicMock(return_value="accounts")
252257
mock_response = MagicMock()
253258
mock_response.json.return_value = {"value": []}
254259
mock_response.status_code = 200
255260
c._request = MagicMock(return_value=mock_response)
256261

257-
with warnings.catch_warnings(record=True):
258-
warnings.simplefilter("always")
259-
c._query_sql("SELECT name FROM account")
262+
c._query_sql("SELECT name FROM account")
260263

261264
call_args = c._request.call_args
262265
sent_params = call_args[1].get("params", {})
263266
sent_sql = sent_params.get("sql", "")
264-
assert "TOP 5000" in sent_sql
267+
# SDK should NOT inject TOP 5000
268+
assert "TOP 5000" not in sent_sql
269+
assert sent_sql == "SELECT name FROM account"
265270

266271
def test_explicit_top_preserved_in_server_request(self):
267272
c = _client()
@@ -277,4 +282,3 @@ def test_explicit_top_preserved_in_server_request(self):
277282
sent_params = call_args[1].get("params", {})
278283
sent_sql = sent_params.get("sql", "")
279284
assert "TOP 50" in sent_sql
280-
assert "TOP 5000" not in sent_sql

0 commit comments

Comments
 (0)