Skip to content

Commit 61a9ee1

Browse files
Abel Milashclaude
andcommitted
Auto-chunk *Multiple operations at 1,000 records (issue #156)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 78cd852 commit 61a9ee1

6 files changed

Lines changed: 730 additions & 40 deletions

File tree

.claude/skills/dataverse-sdk-use/SKILL.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Use the PowerPlatform Dataverse Client Python SDK to interact with Microsoft Dat
2525
- `client.batch` -- batch multiple operations into a single HTTP request
2626

2727
### Bulk Operations
28-
The SDK supports Dataverse's native bulk operations: Pass lists to `create()`, `update()` for automatic bulk processing, for `delete()`, set `use_bulk_delete` when passing lists to use bulk operation
28+
The SDK supports Dataverse's native bulk operations: Pass lists to `create()`, `update()`, or `upsert()` for automatic bulk processing; for `delete()`, set `use_bulk_delete=True`. Lists exceeding 1,000 records are automatically split into sequential 1,000-record chunks — no manual pre-splitting needed. Operations across chunks are **not atomic**: a failure mid-way may leave earlier chunks applied.
2929

3030
### Paging
3131
- Control page size with `page_size` parameter

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,10 @@ client.records.update("account", ids, {"industry": "Technology"})
186186
client.records.delete("account", ids, use_bulk_delete=True)
187187
```
188188

189+
> **Large batches**: Lists exceeding 1,000 records are automatically split into sequential
190+
> 1,000-record chunks — no manual pre-splitting needed. Note that chunked operations are
191+
> **not atomic**: a failure mid-way may leave earlier chunks applied.
192+
189193
### Upsert operations
190194

191195
Use `client.records.upsert()` to create or update records identified by alternate keys. When the

examples/advanced/walkthrough.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,37 @@ def _run_walkthrough(client):
257257
record_ids = [r.get("new_walkthroughdemoid")[:8] + "..." for r in page]
258258
print(f" Page {page_num}: {len(page)} records - IDs: {record_ids}")
259259

260+
# ============================================================================
261+
# 6b. LARGE BATCH (AUTO-CHUNKING)
262+
# The SDK automatically splits lists > 1,000 records into sequential chunks,
263+
# each dispatched as a separate CreateMultiple / UpdateMultiple / UpsertMultiple
264+
# request. No manual pre-splitting needed.
265+
# Note: chunked operations are NOT atomic — a failure mid-way leaves earlier
266+
# chunks applied.
267+
# ============================================================================
268+
print("\n" + "=" * 80)
269+
print("6b. Large Batch (Auto-Chunking)")
270+
print("=" * 80)
271+
272+
LARGE_BATCH_SIZE = 1200 # spans 2 chunks: first 1000 + remaining 200
273+
log_call(f"client.records.create('{table_name}', [{LARGE_BATCH_SIZE} records]) # auto-chunked")
274+
large_batch_records = [
275+
{
276+
"new_Title": f"Batch item {i}",
277+
"new_Quantity": i % 100,
278+
"new_Amount": float(i),
279+
"new_Completed": False,
280+
"new_Priority": Priority.LOW,
281+
}
282+
for i in range(LARGE_BATCH_SIZE)
283+
]
284+
large_batch_ids = backoff(lambda: client.records.create(table_name, large_batch_records))
285+
print(f"[OK] Created {len(large_batch_ids)} records across 2 auto-chunks (1000 + 200)")
286+
287+
log_call(f"client.records.update('{table_name}', [{LARGE_BATCH_SIZE} IDs], {{...}}) # auto-chunked")
288+
backoff(lambda: client.records.update(table_name, large_batch_ids, {"new_Completed": True}))
289+
print(f"[OK] Updated {len(large_batch_ids)} records across 2 auto-chunks")
290+
260291
# ============================================================================
261292
# 7. QUERYBUILDER - FLUENT QUERIES
262293
# ============================================================================
@@ -602,6 +633,7 @@ def _run_walkthrough(client):
602633
print(" [OK] Reading records by ID and with filters")
603634
print(" [OK] Single and multiple record updates")
604635
print(" [OK] Paging through large result sets")
636+
print(" [OK] Large batch auto-chunking (1,200 records split into 2 chunks)")
605637
print(" [OK] QueryBuilder fluent queries (filter_eq, filter_in, filter_between, where, to_dataframe)")
606638
print(" [OK] Expand navigation properties (simple + nested ExpandOption)")
607639
print(" [OK] SQL queries")

src/PowerPlatform/Dataverse/data/_odata.py

Lines changed: 74 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
_GUID_RE = re.compile(r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}")
5353
_CALL_SCOPE_CORRELATION_ID: ContextVar[Optional[str]] = ContextVar("_CALL_SCOPE_CORRELATION_ID", default=None)
5454
_DEFAULT_EXPECTED_STATUSES: tuple[int, ...] = (200, 201, 202, 204)
55+
_MULTIPLE_BATCH_SIZE = 1000
5556

5657

5758
@dataclass
@@ -331,9 +332,17 @@ def _create(self, entity_set: str, table_schema_name: str, record: Dict[str, Any
331332
f"Create response missing GUID in OData-EntityId/Location headers (status={getattr(r,'status_code', '?')}). Headers: {header_keys}"
332333
)
333334

334-
def _create_multiple(self, entity_set: str, table_schema_name: str, records: List[Dict[str, Any]]) -> List[str]:
335+
def _create_multiple(
336+
self,
337+
entity_set: str,
338+
table_schema_name: str,
339+
records: List[Dict[str, Any]],
340+
) -> List[str]:
335341
"""Create multiple records using the collection-bound ``CreateMultiple`` action.
336342
343+
Large record lists are automatically split into chunks of up to
344+
``_MULTIPLE_BATCH_SIZE`` records and dispatched sequentially.
345+
337346
:param entity_set: Resolved entity set (plural) name.
338347
:type entity_set: ``str``
339348
:param table_schema_name: Schema name of the table.
@@ -345,35 +354,42 @@ def _create_multiple(self, entity_set: str, table_schema_name: str, records: Lis
345354
:rtype: ``list[str]``
346355
347356
.. note::
348-
Logical type stamping: if any payload omits ``@odata.type`` the client injects ``Microsoft.Dynamics.CRM.<table_logical_name>``. If all payloads already include ``@odata.type`` no modification occurs.
357+
Logical type stamping: if any payload omits ``@odata.type`` the client
358+
injects ``Microsoft.Dynamics.CRM.<table_logical_name>``. If all payloads
359+
already include ``@odata.type`` no modification occurs.
360+
361+
.. warning::
362+
When input exceeds ``_MULTIPLE_BATCH_SIZE`` records, the operation is
363+
split into multiple requests and is **not atomic**. If a later batch
364+
fails, earlier batches are already committed. Callers that require
365+
atomicity should limit input to ``<= _MULTIPLE_BATCH_SIZE`` records.
349366
"""
350367
if not all(isinstance(r, dict) for r in records):
351368
raise TypeError("All items for multi-create must be dicts")
352-
r = self._execute_raw(self._build_create_multiple(entity_set, table_schema_name, records))
353-
try:
354-
body = r.json() if r.text else {}
355-
except ValueError:
356-
body = {}
357-
if not isinstance(body, dict):
358-
return []
359-
# Expected: { "Ids": [guid, ...] }
360-
ids = body.get("Ids")
361-
if isinstance(ids, list):
362-
return [i for i in ids if isinstance(i, str)]
363-
364-
value = body.get("value")
365-
if isinstance(value, list):
366-
# Extract IDs if possible
367-
out: List[str] = []
368-
for item in value:
369-
if isinstance(item, dict):
370-
# Heuristic: look for a property ending with 'id'
371-
for k, v in item.items():
372-
if isinstance(k, str) and k.lower().endswith("id") and isinstance(v, str) and len(v) >= 32:
373-
out.append(v)
374-
break
375-
return out
376-
return []
369+
370+
all_ids: List[str] = []
371+
for i in range(0, len(records), _MULTIPLE_BATCH_SIZE):
372+
chunk = records[i : i + _MULTIPLE_BATCH_SIZE]
373+
r = self._execute_raw(self._build_create_multiple(entity_set, table_schema_name, chunk))
374+
try:
375+
body = r.json() if r.text else {}
376+
except ValueError:
377+
body = {}
378+
if not isinstance(body, dict):
379+
continue
380+
ids = body.get("Ids")
381+
if isinstance(ids, list):
382+
all_ids.extend(i for i in ids if isinstance(i, str))
383+
continue
384+
value = body.get("value")
385+
if isinstance(value, list):
386+
for item in value:
387+
if isinstance(item, dict):
388+
for k, v in item.items():
389+
if isinstance(k, str) and k.lower().endswith("id") and isinstance(v, str) and len(v) >= 32:
390+
all_ids.append(v)
391+
break
392+
return all_ids
377393

378394
def _build_alternate_key_str(self, alternate_key: Dict[str, Any]) -> str:
379395
"""Build an OData alternate key segment from a mapping of key names to values.
@@ -467,6 +483,10 @@ def _upsert_multiple(
467483
468484
:raises ValueError: If ``alternate_keys`` and ``records`` differ in length, or if
469485
any record payload contains an alternate key field with a conflicting value.
486+
487+
.. warning::
488+
When input exceeds ``_MULTIPLE_BATCH_SIZE`` records, the operation is
489+
split into multiple requests and is **not atomic** across batches.
470490
"""
471491
if len(alternate_keys) != len(records):
472492
raise ValueError(
@@ -488,9 +508,12 @@ def _upsert_multiple(
488508
key_str = self._build_alternate_key_str(alt_key)
489509
record_processed["@odata.id"] = f"{entity_set}({key_str})"
490510
targets.append(record_processed)
491-
payload = {"Targets": targets}
511+
492512
url = f"{self.api}/{entity_set}/Microsoft.Dynamics.CRM.UpsertMultiple"
493-
self._request("post", url, json=payload, expected=(200, 201, 204))
513+
for i in range(0, len(targets), _MULTIPLE_BATCH_SIZE):
514+
chunk = targets[i : i + _MULTIPLE_BATCH_SIZE]
515+
self._request("post", url, json={"Targets": chunk}, expected=(200, 201, 204))
516+
return None
494517

495518
# --- Derived helpers for high-level client ergonomics ---
496519
def _primary_id_attr(self, table_schema_name: str) -> str:
@@ -509,7 +532,10 @@ def _primary_id_attr(self, table_schema_name: str) -> str:
509532
)
510533

511534
def _update_by_ids(
512-
self, table_schema_name: str, ids: List[str], changes: Union[Dict[str, Any], List[Dict[str, Any]]]
535+
self,
536+
table_schema_name: str,
537+
ids: List[str],
538+
changes: Union[Dict[str, Any], List[Dict[str, Any]]],
513539
) -> None:
514540
"""Update many records by GUID list using the collection-bound ``UpdateMultiple`` action.
515541
@@ -607,9 +633,17 @@ def _update(self, table_schema_name: str, key: str, data: Dict[str, Any]) -> Non
607633
"""
608634
self._execute_raw(self._build_update(table_schema_name, key, data))
609635

610-
def _update_multiple(self, entity_set: str, table_schema_name: str, records: List[Dict[str, Any]]) -> None:
636+
def _update_multiple(
637+
self,
638+
entity_set: str,
639+
table_schema_name: str,
640+
records: List[Dict[str, Any]],
641+
) -> None:
611642
"""Bulk update existing records via the collection-bound ``UpdateMultiple`` action.
612643
644+
Large record lists are automatically split into chunks of up to
645+
``_MULTIPLE_BATCH_SIZE`` records and dispatched sequentially.
646+
613647
:param entity_set: Resolved entity set (plural) name.
614648
:type entity_set: ``str``
615649
:param table_schema_name: Schema name of the table, e.g. "new_MyTestTable".
@@ -621,13 +655,20 @@ def _update_multiple(self, entity_set: str, table_schema_name: str, records: Lis
621655
622656
.. note::
623657
- Endpoint: ``POST /{entity_set}/Microsoft.Dynamics.CRM.UpdateMultiple`` with body ``{"Targets": [...]}``.
624-
- Transactional semantics: if any individual update fails, the entire request rolls back.
658+
- Transactional semantics apply within each batch; if a batch fails it rolls back, but earlier batches are already committed.
625659
- Response content is ignored; no stable contract for returned IDs/representations.
626660
- Caller must supply the correct primary key attribute (e.g. ``accountid``) in every record.
661+
662+
.. warning::
663+
When input exceeds ``_MULTIPLE_BATCH_SIZE`` records, the operation is
664+
split into multiple requests and is **not atomic** across batches.
627665
"""
628666
if not isinstance(records, list) or not records or not all(isinstance(r, dict) for r in records):
629667
raise TypeError("records must be a non-empty list[dict]")
630-
self._execute_raw(self._build_update_multiple_from_records(entity_set, table_schema_name, records))
668+
669+
for i in range(0, len(records), _MULTIPLE_BATCH_SIZE):
670+
chunk = records[i : i + _MULTIPLE_BATCH_SIZE]
671+
self._execute_raw(self._build_update_multiple_from_records(entity_set, table_schema_name, chunk))
631672
return None
632673

633674
def _delete(self, table_schema_name: str, key: str) -> None:

src/PowerPlatform/Dataverse/operations/dataframe.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -178,9 +178,11 @@ def create(
178178
IDs does not match the number of input rows.
179179
180180
.. tip::
181-
All rows are sent in a single ``CreateMultiple`` request. For very
182-
large DataFrames, consider splitting into smaller batches to avoid
183-
request timeouts.
181+
The SDK automatically splits large DataFrames into sequential
182+
1,000-row chunks before sending to ``CreateMultiple``. You do not
183+
need to pre-split large DataFrames. Note that chunked operations
184+
are **not atomic** — a failure mid-way may leave earlier chunks
185+
applied.
184186
185187
Example:
186188
Create records from a DataFrame::
@@ -253,9 +255,11 @@ def update(
253255
rows are never skipped.
254256
255257
.. tip::
256-
All rows are sent in a single ``UpdateMultiple`` request (or a
257-
single PATCH for one row). For very large DataFrames, consider
258-
splitting into smaller batches to avoid request timeouts.
258+
The SDK automatically splits large DataFrames into sequential
259+
1,000-row chunks before sending to ``UpdateMultiple`` (or a single
260+
PATCH for one row). You do not need to pre-split large DataFrames.
261+
Note that chunked operations are **not atomic** — a failure
262+
mid-way may leave earlier chunks applied.
259263
260264
Example:
261265
Update records with different values per row::

0 commit comments

Comments
 (0)