Skip to content

Commit 63bdd67

Browse files
author
Samson Gebre
committed
feat: implement pagination handling in SQL queries
1 parent f43577d commit 63bdd67

3 files changed

Lines changed: 396 additions & 10 deletions

File tree

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## [Unreleased]
9+
10+
### Fixed
11+
- `client.query.sql()` silently truncated results at 5,000 rows. The method now follows `@odata.nextLink` pagination and returns all matching rows (#157).
12+
813
## [0.1.0b5] - 2026-02-27
914

1015
### Fixed

src/PowerPlatform/Dataverse/data/_odata.py

Lines changed: 99 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@
1313
import re
1414
import json
1515
import uuid
16+
import warnings
1617
from datetime import datetime, timezone
1718
import importlib.resources as ir
1819
from contextlib import contextmanager
1920
from contextvars import ContextVar
2021

21-
from urllib.parse import quote as _url_quote
22+
from urllib.parse import quote as _url_quote, parse_qs, urlparse
2223

2324
from ..core._http import _HttpClient
2425
from ._upload import _FileUploadMixin
@@ -46,6 +47,34 @@
4647
_DEFAULT_EXPECTED_STATUSES: tuple[int, ...] = (200, 201, 202, 204)
4748

4849

50+
def _extract_pagingcookie(next_link: str) -> Optional[str]:
51+
"""Extract the raw pagingcookie value from a SQL ``@odata.nextLink`` URL.
52+
53+
The Dataverse SQL endpoint has a server-side bug where the pagingcookie
54+
(containing first/last record GUIDs) does not advance between pages even
55+
though ``pagenumber`` increments. Detecting a repeated cookie lets the
56+
pagination loop break instead of looping indefinitely.
57+
58+
Returns the pagingcookie string if present, or ``None`` if not found.
59+
"""
60+
try:
61+
qs = parse_qs(urlparse(next_link).query)
62+
skiptoken = qs.get("$skiptoken", [None])[0]
63+
if not skiptoken:
64+
return None
65+
# parse_qs already URL-decodes the value once, giving the outer XML with
66+
# pagingcookie still percent-encoded (e.g. pagingcookie="%3ccookie...").
67+
# A second decode is intentionally omitted: decoding again would turn %22
68+
# into " inside the cookie XML, breaking the regex and causing every page
69+
# to extract the same truncated prefix regardless of the actual GUIDs.
70+
m = re.search(r'pagingcookie="([^"]+)"', skiptoken)
71+
if m:
72+
return m.group(1)
73+
except Exception:
74+
pass
75+
return None
76+
77+
4978
@dataclass
5079
class _RequestContext:
5180
"""Structured request context used by ``_request`` to clarify payload and metadata."""
@@ -733,15 +762,77 @@ def _query_sql(self, sql: str) -> list[dict[str, Any]]:
733762
body = r.json()
734763
except ValueError:
735764
return []
736-
if isinstance(body, dict):
737-
value = body.get("value")
738-
if isinstance(value, list):
739-
# Ensure dict rows only
740-
return [row for row in value if isinstance(row, dict)]
741-
# Fallbacks: if body itself is a list
765+
766+
# Collect first page
767+
results: list[dict[str, Any]] = []
742768
if isinstance(body, list):
743769
return [row for row in body if isinstance(row, dict)]
744-
return []
770+
if not isinstance(body, dict):
771+
return results
772+
773+
value = body.get("value")
774+
if isinstance(value, list):
775+
results = [row for row in value if isinstance(row, dict)]
776+
777+
# Follow pagination links until exhausted
778+
raw_link = body.get("@odata.nextLink") or body.get("odata.nextLink")
779+
next_link: str | None = raw_link if isinstance(raw_link, str) else None
780+
visited: set[str] = set()
781+
seen_cookies: set[str] = set()
782+
while next_link:
783+
# Guard 1: exact URL cycle (same next_link returned twice)
784+
if next_link in visited:
785+
break
786+
visited.add(next_link)
787+
# Guard 2: server-side bug where pagingcookie does not advance between
788+
# pages (pagenumber increments but cookie GUIDs stay the same), which
789+
# causes an infinite loop even though URLs differ.
790+
cookie = _extract_pagingcookie(next_link)
791+
if cookie is not None:
792+
if cookie in seen_cookies:
793+
warnings.warn(
794+
f"SQL pagination stopped after {len(results)} rows — "
795+
"the Dataverse server returned the same pagingcookie twice "
796+
"(pagenumber incremented but the paging position did not advance). "
797+
"This is a server-side bug. Returning the rows collected so far. "
798+
"To avoid pagination entirely, add a TOP clause to your query.",
799+
RuntimeWarning,
800+
stacklevel=4,
801+
)
802+
break
803+
seen_cookies.add(cookie)
804+
try:
805+
page_resp = self._request("get", next_link)
806+
except Exception as exc:
807+
warnings.warn(
808+
f"SQL pagination stopped after {len(results)} rows — "
809+
f"the next-page request failed: {exc}. "
810+
"Add a TOP clause to your query to limit results to a single page.",
811+
RuntimeWarning,
812+
stacklevel=5,
813+
)
814+
break
815+
try:
816+
page_body = page_resp.json()
817+
except ValueError as exc:
818+
warnings.warn(
819+
f"SQL pagination stopped after {len(results)} rows — "
820+
f"the next-page response was not valid JSON: {exc}. "
821+
"Add a TOP clause to your query to limit results to a single page.",
822+
RuntimeWarning,
823+
stacklevel=5,
824+
)
825+
break
826+
if not isinstance(page_body, dict):
827+
break
828+
page_value = page_body.get("value")
829+
if not isinstance(page_value, list) or not page_value:
830+
break
831+
results.extend(row for row in page_value if isinstance(row, dict))
832+
raw_link = page_body.get("@odata.nextLink") or page_body.get("odata.nextLink")
833+
next_link = raw_link if isinstance(raw_link, str) else None
834+
835+
return results
745836

746837
@staticmethod
747838
def _extract_logical_table(sql: str) -> str:

0 commit comments

Comments
 (0)