Address PR #98 review: single DF return, rename param, add create guards

Abel Milash · Abel Milash · commit 63ec8a8da490 · 2026-03-16T11:52:33.000-07:00
diff --git a/.claude/skills/dataverse-sdk-use/SKILL.md b/.claude/skills/dataverse-sdk-use/SKILL.md
@@ -124,12 +124,12 @@ The SDK provides DataFrame wrappers for all CRUD operations using pandas DataFra
 ```python
 import pandas as pd
 
-# Query records as paged DataFrames (one DataFrame per page)
-for df_page in client.get_dataframe("account", filter="statecode eq 0", select=["name"]):
-    print(f"Page has {len(df_page)} rows")
+# Query records — returns a single DataFrame (like pd.read_sql)
+df = client.get_dataframe("account", filter="statecode eq 0", select=["name"])
+print(f"Got {len(df)} rows")
 
-# Collect all pages into one DataFrame
-df = pd.concat(client.get_dataframe("account", select=["name"], top=100), ignore_index=True)
+# Limit results with top for large tables
+df = client.get_dataframe("account", select=["name"], top=100)
 
 # Fetch single record as one-row DataFrame
 df = client.get_dataframe("account", record_id=account_id, select=["name"])
diff --git a/examples/advanced/dataframe_operations.py b/examples/advanced/dataframe_operations.py
@@ -71,28 +71,24 @@ def main():
     print(f"[OK] Created {len(new_accounts)} records")
     print(f"  IDs: {new_accounts['accountid'].tolist()}")
 
-    # ── 2. Query records as paged DataFrames ──────────────────────
+    # ── 2. Query records as a DataFrame ─────────────────────────
     print("\n" + "-" * 60)
-    print("2. Query records as paged DataFrames (lazy generator)")
+    print("2. Query records as a DataFrame")
     print("-" * 60)
 
-    page_count = 0
-    for df_page in client.get_dataframe(table, select=select_cols, filter=test_filter, page_size=2):
-        page_count += 1
-        print(f"  Page {page_count} ({len(df_page)} records):\n{df_page.to_string(index=False)}")
+    df_all = client.get_dataframe(table, select=select_cols, filter=test_filter)
+    print(f"[OK] Got {len(df_all)} records in one DataFrame")
+    print(f"  Columns: {list(df_all.columns)}")
+    print(f"{df_all.to_string(index=False)}")
 
-    # ── 3. Collect all pages into one DataFrame ───────────────────
+    # ── 3. Limit results with top ──────────────────────────────
     print("\n" + "-" * 60)
-    print("3. Collect all pages into one DataFrame with pd.concat")
+    print("3. Limit results with top")
     print("-" * 60)
 
-    all_records = pd.concat(
-        client.get_dataframe(table, select=select_cols, filter=test_filter, page_size=2),
-        ignore_index=True,
-    )
-    print(f"[OK] Got {len(all_records)} total records in one DataFrame")
-    print(f"  Columns: {list(all_records.columns)}")
-    print(f"{all_records.to_string(index=False)}")
+    df_top2 = client.get_dataframe(table, select=select_cols, filter=test_filter, top=2)
+    print(f"[OK] Got {len(df_top2)} records with top=2")
+    print(f"{df_top2.to_string(index=False)}")
 
     # ── 4. Fetch a single record by ID ────────────────────────────
     print("\n" + "-" * 60)
@@ -114,8 +110,8 @@ def main():
     client.update_dataframe(table, new_accounts[["accountid", "telephone1"]], id_column="accountid")
     print("[OK] Updated 3 records")
 
-    # Verify the updates with a bulk get
-    verified = next(client.get_dataframe(table, select=select_cols, filter=test_filter))
+    # Verify the updates with a query
+    verified = client.get_dataframe(table, select=select_cols, filter=test_filter)
     print(f"  Verified:\n{verified.to_string(index=False)}")
 
     # ── 6. Broadcast update (same value to all records) ───────────
@@ -130,7 +126,7 @@ def main():
     print("[OK] Broadcast update complete")
 
     # Verify all records have the same websiteurl
-    verified = next(client.get_dataframe(table, select=select_cols, filter=test_filter))
+    verified = client.get_dataframe(table, select=select_cols, filter=test_filter)
     print(f"  Verified:\n{verified.to_string(index=False)}")
 
     # Default: NaN/None fields are skipped (not overridden on server)
@@ -141,14 +137,14 @@ def main():
         ]
     )
     client.update_dataframe(table, sparse_df, id_column="accountid")
-    verified = next(client.get_dataframe(table, select=select_cols, filter=test_filter))
+    verified = client.get_dataframe(table, select=select_cols, filter=test_filter)
     print(f"  Verified (Contoso telephone1 updated, websiteurl unchanged):\n{verified.to_string(index=False)}")
 
     # Opt-in: clear_nulls=True sends None as null to clear the field
     print("\n  Clearing websiteurl for Contoso with clear_nulls=True...")
     clear_df = pd.DataFrame([{"accountid": new_accounts["accountid"].iloc[0], "websiteurl": None}])
     client.update_dataframe(table, clear_df, id_column="accountid", clear_nulls=True)
-    verified = next(client.get_dataframe(table, select=select_cols, filter=test_filter))
+    verified = client.get_dataframe(table, select=select_cols, filter=test_filter)
     print(f"  Verified (Contoso websiteurl should be empty):\n{verified.to_string(index=False)}")
 
     # ── 7. Delete records by passing a Series of GUIDs ────────────
@@ -161,9 +157,8 @@ def main():
     print(f"[OK] Deleted {len(new_accounts)} records")
 
     # Verify deletions - filter for our tagged records should return 0
-    remaining = list(client.get_dataframe(table, select=select_cols, filter=test_filter))
-    count = sum(len(page) for page in remaining)
-    print(f"  Verified: {count} test records remaining (expected 0)")
+    remaining = client.get_dataframe(table, select=select_cols, filter=test_filter)
+    print(f"  Verified: {len(remaining)} test records remaining (expected 0)")
 
     print("\n" + "=" * 60)
     print("[OK] DataFrame operations walkthrough complete!")
diff --git a/src/PowerPlatform/Dataverse/claude_skill/dataverse-sdk-use/SKILL.md b/src/PowerPlatform/Dataverse/claude_skill/dataverse-sdk-use/SKILL.md
@@ -124,12 +124,12 @@ The SDK provides DataFrame wrappers for all CRUD operations using pandas DataFra
 ```python
 import pandas as pd
 
-# Query records as paged DataFrames (one DataFrame per page)
-for df_page in client.get_dataframe("account", filter="statecode eq 0", select=["name"]):
-    print(f"Page has {len(df_page)} rows")
+# Query records — returns a single DataFrame (like pd.read_sql)
+df = client.get_dataframe("account", filter="statecode eq 0", select=["name"])
+print(f"Got {len(df)} rows")
 
-# Collect all pages into one DataFrame
-df = pd.concat(client.get_dataframe("account", select=["name"], top=100), ignore_index=True)
+# Limit results with top for large tables
+df = client.get_dataframe("account", select=["name"], top=100)
 
 # Fetch single record as one-row DataFrame
 df = client.get_dataframe("account", record_id=account_id, select=["name"])
diff --git a/src/PowerPlatform/Dataverse/client.py b/src/PowerPlatform/Dataverse/client.py
@@ -374,13 +374,13 @@ def get_dataframe(
         top: Optional[int] = None,
         expand: Optional[List[str]] = None,
         page_size: Optional[int] = None,
-    ) -> Union[pd.DataFrame, Iterable[pd.DataFrame]]:
+    ) -> pd.DataFrame:
         """
-        Fetch records and return as pandas DataFrames.
+        Fetch records and return as a single pandas DataFrame.
 
         When ``record_id`` is provided, returns a single-row DataFrame.
-        When ``record_id`` is None, returns a generator yielding one DataFrame per page,
-        matching the paging behavior of :meth:`get`.
+        When ``record_id`` is None, internally iterates all pages and returns one
+        consolidated DataFrame, similar to ``pd.read_sql()``.
 
         :param table_schema_name: Schema name of the table (e.g. ``"account"`` or ``"new_MyTestTable"``).
         :type table_schema_name: :class:`str`
@@ -399,24 +399,27 @@ def get_dataframe(
         :param page_size: Optional number of records per page for pagination.
         :type page_size: :class:`int` or None
 
-        :return: Single-row DataFrame if ``record_id`` is provided, otherwise a generator
-            yielding one DataFrame per page of results.
-        :rtype: ~pandas.DataFrame or :class:`collections.abc.Iterable` of ~pandas.DataFrame
+        :return: DataFrame containing all matching records. Returns an empty DataFrame
+            when no records match.
+        :rtype: ~pandas.DataFrame
+
+        .. tip::
+            For large tables, use ``top`` or ``filter`` to limit the result set.
 
         Example:
             Fetch a single record as a DataFrame::
 
                 df = client.get_dataframe("account", record_id=account_id, select=["name", "telephone1"])
                 print(df)
 
-            Iterate over paged results::
+            Query with filtering::
 
-                for df_page in client.get_dataframe("account", filter="statecode eq 0", top=100):
-                    print(f"Page has {len(df_page)} rows")
+                df = client.get_dataframe("account", filter="statecode eq 0", select=["name"])
+                print(f"Got {len(df)} active accounts")
 
-            Collect all pages into one DataFrame::
+            Limit result size::
 
-                all_data = pd.concat(client.get_dataframe("account", select=["name"]), ignore_index=True)
+                df = client.get_dataframe("account", select=["name"], top=100)
         """
         if record_id is not None:
             result = self.get(
@@ -426,19 +429,21 @@ def get_dataframe(
             )
             return pd.DataFrame([strip_odata_keys(result)])
 
-        def _paged_df() -> Iterable[pd.DataFrame]:
-            for batch in self.get(
-                table_schema_name,
-                select=select,
-                filter=filter,
-                orderby=orderby,
-                top=top,
-                expand=expand,
-                page_size=page_size,
-            ):
-                yield pd.DataFrame([strip_odata_keys(row) for row in batch])
-
-        return _paged_df()
+        frames: List[pd.DataFrame] = []
+        for batch in self.get(
+            table_schema_name,
+            select=select,
+            filter=filter,
+            orderby=orderby,
+            top=top,
+            expand=expand,
+            page_size=page_size,
+        ):
+            frames.append(pd.DataFrame([strip_odata_keys(row) for row in batch]))
+
+        if not frames:
+            return pd.DataFrame()
+        return pd.concat(frames, ignore_index=True)
 
     def create_dataframe(
         self,
@@ -457,6 +462,8 @@ def create_dataframe(
         :rtype: ~pandas.Series
 
         :raises TypeError: If ``records`` is not a pandas DataFrame.
+        :raises ValueError: If ``records`` is empty or the number of returned
+            IDs does not match the number of input rows.
 
         Example:
             Create records from a DataFrame::
@@ -472,27 +479,36 @@ def create_dataframe(
         if not isinstance(records, pd.DataFrame):
             raise TypeError("records must be a pandas DataFrame")
 
+        if records.empty:
+            raise ValueError("records must be a non-empty DataFrame")
+
         record_list = dataframe_to_records(records)
         ids = self.create(table_schema_name, record_list)
+
+        if len(ids) != len(records):
+            raise ValueError(
+                f"Server returned {len(ids)} IDs for {len(records)} input rows"
+            )
+
         return pd.Series(ids, index=records.index)
 
     def update_dataframe(
         self,
         table_schema_name: str,
-        records: pd.DataFrame,
+        changes: pd.DataFrame,
         id_column: str,
         clear_nulls: bool = False,
     ) -> None:
         """
         Update records from a pandas DataFrame.
 
         Each row in the DataFrame represents an update. The ``id_column`` specifies which
-        column contains the record GUIDs.
+        column contains the record GUIDs; the remaining columns are the fields to update.
 
         :param table_schema_name: Schema name of the table (e.g. ``"account"`` or ``"new_MyTestTable"``).
         :type table_schema_name: :class:`str`
-        :param records: DataFrame where each row contains record GUID and fields to update.
-        :type records: ~pd.DataFrame
+        :param changes: DataFrame where each row contains a record GUID and the fields to update.
+        :type changes: ~pandas.DataFrame
         :param id_column: Name of the DataFrame column containing record GUIDs.
         :type id_column: :class:`str`
         :param clear_nulls: When ``False`` (default), missing values (NaN/None) are skipped
@@ -501,7 +517,7 @@ def update_dataframe(
             want NaN/None values to clear fields.
         :type clear_nulls: :class:`bool`
 
-        :raises TypeError: If ``records`` is not a pandas DataFrame.
+        :raises TypeError: If ``changes`` is not a pandas DataFrame.
         :raises ValueError: If ``id_column`` is not found in the DataFrame.
 
         Example:
@@ -526,19 +542,19 @@ def update_dataframe(
                 df = pd.DataFrame([{"accountid": "guid-1", "websiteurl": None}])
                 client.update_dataframe("account", df, id_column="accountid", clear_nulls=True)
         """
-        if not isinstance(records, pd.DataFrame):
-            raise TypeError("records must be a pandas DataFrame")
-        if id_column not in records.columns:
+        if not isinstance(changes, pd.DataFrame):
+            raise TypeError("changes must be a pandas DataFrame")
+        if id_column not in changes.columns:
             raise ValueError(f"id_column '{id_column}' not found in DataFrame columns")
 
-        ids = records[id_column].tolist()
-        change_columns = [column for column in records.columns if column != id_column]
-        changes = dataframe_to_records(records[change_columns], na_as_null=clear_nulls)
+        ids = changes[id_column].tolist()
+        change_columns = [column for column in changes.columns if column != id_column]
+        change_list = dataframe_to_records(changes[change_columns], na_as_null=clear_nulls)
 
         if len(ids) == 1:
-            self.update(table_schema_name, ids[0], changes[0])
+            self.update(table_schema_name, ids[0], change_list[0])
         else:
-            self.update(table_schema_name, ids, changes)
+            self.update(table_schema_name, ids, change_list)
 
     def delete_dataframe(
         self,
diff --git a/tests/unit/test_client_dataframe.py b/tests/unit/test_client_dataframe.py