Skip to content

Commit 63ec8a8

Browse files
author
Abel Milash
committed
Address PR #98 review: single DF return, rename param, add create guards
1 parent 7a6fb2a commit 63ec8a8

5 files changed

Lines changed: 124 additions & 111 deletions

File tree

.claude/skills/dataverse-sdk-use/SKILL.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,12 @@ The SDK provides DataFrame wrappers for all CRUD operations using pandas DataFra
124124
```python
125125
import pandas as pd
126126

127-
# Query records as paged DataFrames (one DataFrame per page)
128-
for df_page in client.get_dataframe("account", filter="statecode eq 0", select=["name"]):
129-
print(f"Page has {len(df_page)} rows")
127+
# Query records — returns a single DataFrame (like pd.read_sql)
128+
df = client.get_dataframe("account", filter="statecode eq 0", select=["name"])
129+
print(f"Got {len(df)} rows")
130130

131-
# Collect all pages into one DataFrame
132-
df = pd.concat(client.get_dataframe("account", select=["name"], top=100), ignore_index=True)
131+
# Limit results with top for large tables
132+
df = client.get_dataframe("account", select=["name"], top=100)
133133

134134
# Fetch single record as one-row DataFrame
135135
df = client.get_dataframe("account", record_id=account_id, select=["name"])

examples/advanced/dataframe_operations.py

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -71,28 +71,24 @@ def main():
7171
print(f"[OK] Created {len(new_accounts)} records")
7272
print(f" IDs: {new_accounts['accountid'].tolist()}")
7373

74-
# ── 2. Query records as paged DataFrames ──────────────────────
74+
# ── 2. Query records as a DataFrame ─────────────────────────
7575
print("\n" + "-" * 60)
76-
print("2. Query records as paged DataFrames (lazy generator)")
76+
print("2. Query records as a DataFrame")
7777
print("-" * 60)
7878

79-
page_count = 0
80-
for df_page in client.get_dataframe(table, select=select_cols, filter=test_filter, page_size=2):
81-
page_count += 1
82-
print(f" Page {page_count} ({len(df_page)} records):\n{df_page.to_string(index=False)}")
79+
df_all = client.get_dataframe(table, select=select_cols, filter=test_filter)
80+
print(f"[OK] Got {len(df_all)} records in one DataFrame")
81+
print(f" Columns: {list(df_all.columns)}")
82+
print(f"{df_all.to_string(index=False)}")
8383

84-
# ── 3. Collect all pages into one DataFrame ───────────────────
84+
# ── 3. Limit results with top ──────────────────────────────
8585
print("\n" + "-" * 60)
86-
print("3. Collect all pages into one DataFrame with pd.concat")
86+
print("3. Limit results with top")
8787
print("-" * 60)
8888

89-
all_records = pd.concat(
90-
client.get_dataframe(table, select=select_cols, filter=test_filter, page_size=2),
91-
ignore_index=True,
92-
)
93-
print(f"[OK] Got {len(all_records)} total records in one DataFrame")
94-
print(f" Columns: {list(all_records.columns)}")
95-
print(f"{all_records.to_string(index=False)}")
89+
df_top2 = client.get_dataframe(table, select=select_cols, filter=test_filter, top=2)
90+
print(f"[OK] Got {len(df_top2)} records with top=2")
91+
print(f"{df_top2.to_string(index=False)}")
9692

9793
# ── 4. Fetch a single record by ID ────────────────────────────
9894
print("\n" + "-" * 60)
@@ -114,8 +110,8 @@ def main():
114110
client.update_dataframe(table, new_accounts[["accountid", "telephone1"]], id_column="accountid")
115111
print("[OK] Updated 3 records")
116112

117-
# Verify the updates with a bulk get
118-
verified = next(client.get_dataframe(table, select=select_cols, filter=test_filter))
113+
# Verify the updates with a query
114+
verified = client.get_dataframe(table, select=select_cols, filter=test_filter)
119115
print(f" Verified:\n{verified.to_string(index=False)}")
120116

121117
# ── 6. Broadcast update (same value to all records) ───────────
@@ -130,7 +126,7 @@ def main():
130126
print("[OK] Broadcast update complete")
131127

132128
# Verify all records have the same websiteurl
133-
verified = next(client.get_dataframe(table, select=select_cols, filter=test_filter))
129+
verified = client.get_dataframe(table, select=select_cols, filter=test_filter)
134130
print(f" Verified:\n{verified.to_string(index=False)}")
135131

136132
# Default: NaN/None fields are skipped (not overridden on server)
@@ -141,14 +137,14 @@ def main():
141137
]
142138
)
143139
client.update_dataframe(table, sparse_df, id_column="accountid")
144-
verified = next(client.get_dataframe(table, select=select_cols, filter=test_filter))
140+
verified = client.get_dataframe(table, select=select_cols, filter=test_filter)
145141
print(f" Verified (Contoso telephone1 updated, websiteurl unchanged):\n{verified.to_string(index=False)}")
146142

147143
# Opt-in: clear_nulls=True sends None as null to clear the field
148144
print("\n Clearing websiteurl for Contoso with clear_nulls=True...")
149145
clear_df = pd.DataFrame([{"accountid": new_accounts["accountid"].iloc[0], "websiteurl": None}])
150146
client.update_dataframe(table, clear_df, id_column="accountid", clear_nulls=True)
151-
verified = next(client.get_dataframe(table, select=select_cols, filter=test_filter))
147+
verified = client.get_dataframe(table, select=select_cols, filter=test_filter)
152148
print(f" Verified (Contoso websiteurl should be empty):\n{verified.to_string(index=False)}")
153149

154150
# ── 7. Delete records by passing a Series of GUIDs ────────────
@@ -161,9 +157,8 @@ def main():
161157
print(f"[OK] Deleted {len(new_accounts)} records")
162158

163159
# Verify deletions - filter for our tagged records should return 0
164-
remaining = list(client.get_dataframe(table, select=select_cols, filter=test_filter))
165-
count = sum(len(page) for page in remaining)
166-
print(f" Verified: {count} test records remaining (expected 0)")
160+
remaining = client.get_dataframe(table, select=select_cols, filter=test_filter)
161+
print(f" Verified: {len(remaining)} test records remaining (expected 0)")
167162

168163
print("\n" + "=" * 60)
169164
print("[OK] DataFrame operations walkthrough complete!")

src/PowerPlatform/Dataverse/claude_skill/dataverse-sdk-use/SKILL.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,12 @@ The SDK provides DataFrame wrappers for all CRUD operations using pandas DataFra
124124
```python
125125
import pandas as pd
126126

127-
# Query records as paged DataFrames (one DataFrame per page)
128-
for df_page in client.get_dataframe("account", filter="statecode eq 0", select=["name"]):
129-
print(f"Page has {len(df_page)} rows")
127+
# Query records — returns a single DataFrame (like pd.read_sql)
128+
df = client.get_dataframe("account", filter="statecode eq 0", select=["name"])
129+
print(f"Got {len(df)} rows")
130130

131-
# Collect all pages into one DataFrame
132-
df = pd.concat(client.get_dataframe("account", select=["name"], top=100), ignore_index=True)
131+
# Limit results with top for large tables
132+
df = client.get_dataframe("account", select=["name"], top=100)
133133

134134
# Fetch single record as one-row DataFrame
135135
df = client.get_dataframe("account", record_id=account_id, select=["name"])

src/PowerPlatform/Dataverse/client.py

Lines changed: 54 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -374,13 +374,13 @@ def get_dataframe(
374374
top: Optional[int] = None,
375375
expand: Optional[List[str]] = None,
376376
page_size: Optional[int] = None,
377-
) -> Union[pd.DataFrame, Iterable[pd.DataFrame]]:
377+
) -> pd.DataFrame:
378378
"""
379-
Fetch records and return as pandas DataFrames.
379+
Fetch records and return as a single pandas DataFrame.
380380
381381
When ``record_id`` is provided, returns a single-row DataFrame.
382-
When ``record_id`` is None, returns a generator yielding one DataFrame per page,
383-
matching the paging behavior of :meth:`get`.
382+
When ``record_id`` is None, internally iterates all pages and returns one
383+
consolidated DataFrame, similar to ``pd.read_sql()``.
384384
385385
:param table_schema_name: Schema name of the table (e.g. ``"account"`` or ``"new_MyTestTable"``).
386386
:type table_schema_name: :class:`str`
@@ -399,24 +399,27 @@ def get_dataframe(
399399
:param page_size: Optional number of records per page for pagination.
400400
:type page_size: :class:`int` or None
401401
402-
:return: Single-row DataFrame if ``record_id`` is provided, otherwise a generator
403-
yielding one DataFrame per page of results.
404-
:rtype: ~pandas.DataFrame or :class:`collections.abc.Iterable` of ~pandas.DataFrame
402+
:return: DataFrame containing all matching records. Returns an empty DataFrame
403+
when no records match.
404+
:rtype: ~pandas.DataFrame
405+
406+
.. tip::
407+
For large tables, use ``top`` or ``filter`` to limit the result set.
405408
406409
Example:
407410
Fetch a single record as a DataFrame::
408411
409412
df = client.get_dataframe("account", record_id=account_id, select=["name", "telephone1"])
410413
print(df)
411414
412-
Iterate over paged results::
415+
Query with filtering::
413416
414-
for df_page in client.get_dataframe("account", filter="statecode eq 0", top=100):
415-
print(f"Page has {len(df_page)} rows")
417+
df = client.get_dataframe("account", filter="statecode eq 0", select=["name"])
418+
print(f"Got {len(df)} active accounts")
416419
417-
Collect all pages into one DataFrame::
420+
Limit result size::
418421
419-
all_data = pd.concat(client.get_dataframe("account", select=["name"]), ignore_index=True)
422+
df = client.get_dataframe("account", select=["name"], top=100)
420423
"""
421424
if record_id is not None:
422425
result = self.get(
@@ -426,19 +429,21 @@ def get_dataframe(
426429
)
427430
return pd.DataFrame([strip_odata_keys(result)])
428431

429-
def _paged_df() -> Iterable[pd.DataFrame]:
430-
for batch in self.get(
431-
table_schema_name,
432-
select=select,
433-
filter=filter,
434-
orderby=orderby,
435-
top=top,
436-
expand=expand,
437-
page_size=page_size,
438-
):
439-
yield pd.DataFrame([strip_odata_keys(row) for row in batch])
440-
441-
return _paged_df()
432+
frames: List[pd.DataFrame] = []
433+
for batch in self.get(
434+
table_schema_name,
435+
select=select,
436+
filter=filter,
437+
orderby=orderby,
438+
top=top,
439+
expand=expand,
440+
page_size=page_size,
441+
):
442+
frames.append(pd.DataFrame([strip_odata_keys(row) for row in batch]))
443+
444+
if not frames:
445+
return pd.DataFrame()
446+
return pd.concat(frames, ignore_index=True)
442447

443448
def create_dataframe(
444449
self,
@@ -457,6 +462,8 @@ def create_dataframe(
457462
:rtype: ~pandas.Series
458463
459464
:raises TypeError: If ``records`` is not a pandas DataFrame.
465+
:raises ValueError: If ``records`` is empty or the number of returned
466+
IDs does not match the number of input rows.
460467
461468
Example:
462469
Create records from a DataFrame::
@@ -472,27 +479,36 @@ def create_dataframe(
472479
if not isinstance(records, pd.DataFrame):
473480
raise TypeError("records must be a pandas DataFrame")
474481

482+
if records.empty:
483+
raise ValueError("records must be a non-empty DataFrame")
484+
475485
record_list = dataframe_to_records(records)
476486
ids = self.create(table_schema_name, record_list)
487+
488+
if len(ids) != len(records):
489+
raise ValueError(
490+
f"Server returned {len(ids)} IDs for {len(records)} input rows"
491+
)
492+
477493
return pd.Series(ids, index=records.index)
478494

479495
def update_dataframe(
480496
self,
481497
table_schema_name: str,
482-
records: pd.DataFrame,
498+
changes: pd.DataFrame,
483499
id_column: str,
484500
clear_nulls: bool = False,
485501
) -> None:
486502
"""
487503
Update records from a pandas DataFrame.
488504
489505
Each row in the DataFrame represents an update. The ``id_column`` specifies which
490-
column contains the record GUIDs.
506+
column contains the record GUIDs; the remaining columns are the fields to update.
491507
492508
:param table_schema_name: Schema name of the table (e.g. ``"account"`` or ``"new_MyTestTable"``).
493509
:type table_schema_name: :class:`str`
494-
:param records: DataFrame where each row contains record GUID and fields to update.
495-
:type records: ~pd.DataFrame
510+
:param changes: DataFrame where each row contains a record GUID and the fields to update.
511+
:type changes: ~pandas.DataFrame
496512
:param id_column: Name of the DataFrame column containing record GUIDs.
497513
:type id_column: :class:`str`
498514
:param clear_nulls: When ``False`` (default), missing values (NaN/None) are skipped
@@ -501,7 +517,7 @@ def update_dataframe(
501517
want NaN/None values to clear fields.
502518
:type clear_nulls: :class:`bool`
503519
504-
:raises TypeError: If ``records`` is not a pandas DataFrame.
520+
:raises TypeError: If ``changes`` is not a pandas DataFrame.
505521
:raises ValueError: If ``id_column`` is not found in the DataFrame.
506522
507523
Example:
@@ -526,19 +542,19 @@ def update_dataframe(
526542
df = pd.DataFrame([{"accountid": "guid-1", "websiteurl": None}])
527543
client.update_dataframe("account", df, id_column="accountid", clear_nulls=True)
528544
"""
529-
if not isinstance(records, pd.DataFrame):
530-
raise TypeError("records must be a pandas DataFrame")
531-
if id_column not in records.columns:
545+
if not isinstance(changes, pd.DataFrame):
546+
raise TypeError("changes must be a pandas DataFrame")
547+
if id_column not in changes.columns:
532548
raise ValueError(f"id_column '{id_column}' not found in DataFrame columns")
533549

534-
ids = records[id_column].tolist()
535-
change_columns = [column for column in records.columns if column != id_column]
536-
changes = dataframe_to_records(records[change_columns], na_as_null=clear_nulls)
550+
ids = changes[id_column].tolist()
551+
change_columns = [column for column in changes.columns if column != id_column]
552+
change_list = dataframe_to_records(changes[change_columns], na_as_null=clear_nulls)
537553

538554
if len(ids) == 1:
539-
self.update(table_schema_name, ids[0], changes[0])
555+
self.update(table_schema_name, ids[0], change_list[0])
540556
else:
541-
self.update(table_schema_name, ids, changes)
557+
self.update(table_schema_name, ids, change_list)
542558

543559
def delete_dataframe(
544560
self,

0 commit comments

Comments
 (0)