Skip to content

Commit 3eab669

Browse files
Merge pull request #34 from microsoft/users/zhaodongwang/bulkDelete
bulk delete
2 parents bd5364d + ebd5c00 commit 3eab669

File tree

4 files changed

+158
-56
lines changed

4 files changed

+158
-56
lines changed

README.md

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ Auth:
3939
| `update` | `update(logical_name, list[id], patch)` | `None` | Broadcast; same patch applied to all IDs (UpdateMultiple). |
4040
| `update` | `update(logical_name, list[id], list[patch])` | `None` | 1:1 patches; lengths must match (UpdateMultiple). |
4141
| `delete` | `delete(logical_name, id)` | `None` | Delete one record. |
42-
| `delete` | `delete(logical_name, list[id])` | `None` | Delete many (sequential). |
42+
| `delete` | `delete(logical_name, list[id], use_bulk_delete=True)` | `Optional[str]` | Delete many with async BulkDelete or sequential single-record delete. |
4343
| `query_sql` | `query_sql(sql)` | `list[dict]` | Constrained read-only SELECT via `?sql=`. |
4444
| `create_table` | `create_table(tablename, schema, solution_unique_name=None)` | `dict` | Creates custom table + columns. Friendly name (e.g. `SampleItem`) becomes schema `new_SampleItem`; explicit schema name (contains `_`) used as-is. Pass `solution_unique_name` to attach the table to a specific solution instead of the default solution. |
4545
| `create_column` | `create_column(tablename, columns)` | `list[str]` | Adds columns using a `{name: type}` mapping (same shape as `create_table` schema). Returns schema names for the created columns. |
@@ -54,8 +54,10 @@ Auth:
5454

5555
Guidelines:
5656
- `create` always returns a list of GUIDs (1 for single, N for bulk).
57-
- `update`/`delete` always return `None` (single and multi forms).
57+
- `update` always returns `None`.
5858
- Bulk update chooses broadcast vs per-record by the type of `changes` (dict vs list).
59+
- `delete` returns `None` for single-record delete and sequential multi-record delete, and the BulkDelete async job ID for multi-record BulkDelete.
60+
- BulkDelete doesn't wait for the delete job to complete. It returns once the async delete job is scheduled.
5961
- Paging and SQL operations never mutate inputs.
6062
- Metadata lookups for logical name stamping cached per entity set (in-memory).
6163

@@ -138,9 +140,12 @@ client.update("account", ids, [
138140
])
139141
print({"multi_update": "ok"})
140142

141-
# Delete
143+
# Delete (single)
142144
client.delete("account", account_id)
143145

146+
# Bulk delete (schedules BulkDelete and returns job id)
147+
job_id = client.delete("account", ids)
148+
144149
# SQL (read-only) via Web API `?sql=`
145150
rows = client.query_sql("SELECT TOP 3 accountid, name FROM account ORDER BY createdon DESC")
146151
for r in rows:
@@ -329,7 +334,8 @@ client.delete_table("SampleItem") # delete table (friendly name or explici
329334

330335
Notes:
331336
- `create` always returns a list of GUIDs (length 1 for single input).
332-
- `update` and `delete` return `None` for both single and multi.
337+
- `update` returns `None`.
338+
- `delete` returns `None` for single-record delete/sequential multi-record delete, and the BulkDelete async job ID for BulkDelete.
333339
- Passing a list of payloads to `create` triggers bulk create and returns `list[str]` of IDs.
334340
- `get` supports single record retrieval with record id or paging through result sets (prefer `select` to limit columns).
335341
- For CRUD methods that take a record id, pass the GUID string (36-char hyphenated). Parentheses around the GUID are accepted but not required.
@@ -345,7 +351,6 @@ VS Code Tasks
345351

346352
## Limitations / Future Work
347353
- No general-purpose OData batching, upsert, or association operations yet.
348-
- `DeleteMultiple` not yet exposed.
349354
- Minimal retry policy in library (network-error only); examples include additional backoff for transient Dataverse consistency.
350355

351356
## Contributing

examples/quickstart.py

Lines changed: 63 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import requests
1515
import time
1616
from datetime import date, timedelta
17-
from concurrent.futures import ThreadPoolExecutor, as_completed
1817

1918

2019
entered = input("Enter Dataverse org URL (e.g. https://yourorg.crm.dynamics.com): ").strip()
@@ -57,10 +56,12 @@ def backoff_retry(op, *, delays=(0, 2, 5, 10, 20), retry_http_statuses=(400, 403
5756
print(f'Request failed: {ex}')
5857
last_exc = ex
5958
if retry_if and retry_if(ex):
59+
print("Retrying operation...")
6060
continue
6161
if isinstance(ex, requests.exceptions.HTTPError):
6262
code = getattr(getattr(ex, 'response', None), 'status_code', None)
6363
if code in retry_http_statuses:
64+
print("Retrying operation...")
6465
continue
6566
break
6667
if last_exc:
@@ -176,20 +177,6 @@ def print_line_summaries(label: str, summaries: list[dict]) -> None:
176177
f"count={s.get('count')} amount={s.get('amount')} when={s.get('when')}"
177178
)
178179

179-
def _resolve_status_value(kind: str, raw_value, use_french: bool):
180-
"""kind values:
181-
- 'label': English label
182-
- 'fr_label': French label if allowed, else fallback to English equivalent
183-
- 'int': the enum integer value
184-
"""
185-
if kind == "label":
186-
return raw_value
187-
if kind == "fr_label":
188-
if use_french:
189-
return raw_value
190-
return "Active" if raw_value == "Actif" else "Inactive"
191-
return raw_value
192-
193180
def _has_installed_language(base_url: str, credential, lcid: int) -> bool:
194181
try:
195182
token = credential.get_token(f"{base_url}/.default").token
@@ -496,39 +483,60 @@ def run_paging_demo(label: str, *, top: Optional[int], page_size: Optional[int])
496483
print(f"Retrieve multiple demos failed: {e}")
497484
# 5) Delete record
498485
print("Delete (OData):")
499-
# Show deletes to be executed (concurrently via SDK delete)
486+
# Show deletes to be executed (single + bulk)
500487
if 'record_ids' in locals() and record_ids:
501488
print({"delete_count": len(record_ids)})
502-
pause("Execute Delete (concurrent SDK calls)")
489+
pause("Execute Delete (single then bulk)")
503490
try:
504491
if record_ids:
505-
max_workers = min(8, len(record_ids))
506-
log_call(f"concurrent delete {len(record_ids)} items from '{logical}' (workers={max_workers})")
492+
single_target = record_ids[0]
493+
rest_targets = record_ids[1:]
494+
single_error: Optional[str] = None
495+
bulk_job_id: Optional[str] = None
496+
bulk_error: Optional[str] = None
507497

508-
successes: list[str] = []
509-
failures: list[dict] = []
498+
try:
499+
log_call(f"client.delete('{logical}', '{single_target}')")
500+
backoff_retry(lambda: client.delete(logical, single_target))
501+
except Exception as ex:
502+
single_error = str(ex)
510503

511-
def _del_one(rid: str) -> tuple[str, bool, str | None]:
512-
try:
513-
log_call(f"client.delete('{logical}', '{rid}')")
514-
backoff_retry(lambda: client.delete(logical, rid))
515-
return (rid, True, None)
516-
except Exception as ex:
517-
return (rid, False, str(ex))
518-
519-
with ThreadPoolExecutor(max_workers=max_workers) as executor:
520-
future_map = {executor.submit(_del_one, rid): rid for rid in record_ids}
521-
for fut in as_completed(future_map):
522-
rid, ok, err = fut.result()
523-
if ok:
524-
successes.append(rid)
525-
else:
526-
failures.append({"id": rid, "error": err})
504+
half = max(1, len(rest_targets) // 2)
505+
bulk_targets = rest_targets[:half]
506+
sequential_targets = rest_targets[half:]
507+
bulk_error = None
508+
sequential_error = None
509+
510+
# Fire-and-forget bulk delete for the first portion
511+
try:
512+
log_call(f"client.delete('{logical}', <{len(bulk_targets)} ids>, use_bulk_delete=True)")
513+
bulk_job_id = client.delete(logical, bulk_targets)
514+
except Exception as ex:
515+
bulk_error = str(ex)
516+
517+
# Sequential deletes for the remainder
518+
try:
519+
log_call(f"client.delete('{logical}', <{len(sequential_targets)} ids>, use_bulk_delete=False)")
520+
for rid in sequential_targets:
521+
backoff_retry(lambda rid=rid: client.delete(logical, rid, use_bulk_delete=False))
522+
except Exception as ex:
523+
sequential_error = str(ex)
527524

528525
print({
529526
"entity": logical,
530-
"delete_summary": {"requested": len(record_ids), "success": len(successes), "failures": len(failures)},
531-
"failed": failures[:5], # preview up to 5 failures
527+
"delete_single": {
528+
"id": single_target,
529+
"error": single_error,
530+
},
531+
"delete_bulk": {
532+
"count": len(bulk_targets),
533+
"job_id": bulk_job_id,
534+
"error": bulk_error,
535+
},
536+
"delete_sequential": {
537+
"count": len(sequential_targets),
538+
"error": sequential_error,
539+
},
532540
})
533541
else:
534542
raise RuntimeError("No record created; skipping delete.")
@@ -577,8 +585,22 @@ def _metadata_after_create():
577585
if isinstance(raw_type, str):
578586
attr_type_before = raw_type
579587
lowered = raw_type.lower()
580-
log_call(f"client.delete_column('{entity_schema}', '{scratch_column}')")
581-
column_delete = client.delete_columns(entity_schema, scratch_column)
588+
delete_target = attribute_schema or scratch_column
589+
log_call(f"client.delete_column('{entity_schema}', '{delete_target}')")
590+
591+
def _delete_column():
592+
return client.delete_columns(entity_schema, delete_target)
593+
594+
column_delete = backoff_retry(
595+
_delete_column,
596+
delays=(0, 1, 2, 4, 8),
597+
retry_http_statuses=(),
598+
retry_if=lambda exc: (
599+
isinstance(exc, MetadataError)
600+
or "not found" in str(exc).lower()
601+
or "not yet available" in str(exc).lower()
602+
),
603+
)
582604
if not isinstance(column_delete, list) or not column_delete:
583605
raise RuntimeError("delete_column did not return schema list")
584606
deleted_details = column_delete

src/dataverse_sdk/client.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -201,16 +201,28 @@ def update(self, logical_name: str, ids: Union[str, List[str]], changes: Union[D
201201
od._update_by_ids(logical_name, ids, changes)
202202
return None
203203

204-
def delete(self, logical_name: str, ids: Union[str, List[str]]) -> None:
204+
def delete(
205+
self,
206+
logical_name: str,
207+
ids: Union[str, List[str]],
208+
use_bulk_delete: bool = True,
209+
) -> Optional[str]:
205210
"""
206211
Delete one or more records by GUID.
207212
208213
:param logical_name: Logical (singular) entity name, e.g. ``"account"``.
209214
:type logical_name: str
210215
:param ids: Single GUID string or list of GUID strings to delete.
211216
:type ids: str or list[str]
217+
:param use_bulk_delete: When ``True`` (default) and ``ids`` is a list, execute the BulkDelete action and
218+
return its async job identifier. When ``False`` each record is deleted sequentially.
219+
:type use_bulk_delete: bool
212220
213221
:raises TypeError: If ``ids`` is not str or list[str].
222+
:raises HttpError: If the underlying Web API delete request fails.
223+
224+
:return: BulkDelete job ID when deleting multiple records via BulkDelete; otherwise ``None``.
225+
:rtype: str or None
214226
215227
Example:
216228
Delete a single record::
@@ -219,15 +231,22 @@ def delete(self, logical_name: str, ids: Union[str, List[str]]) -> None:
219231
220232
Delete multiple records::
221233
222-
client.delete("account", [id1, id2, id3])
234+
job_id = client.delete("account", [id1, id2, id3])
223235
"""
224236
od = self._get_odata()
225237
if isinstance(ids, str):
226238
od._delete(logical_name, ids)
227239
return None
228240
if not isinstance(ids, list):
229241
raise TypeError("ids must be str or list[str]")
230-
od._delete_multiple(logical_name, ids)
242+
if not ids:
243+
return None
244+
if not all(isinstance(rid, str) for rid in ids):
245+
raise TypeError("ids must contain string GUIDs")
246+
if use_bulk_delete:
247+
return od._delete_multiple(logical_name, ids)
248+
for rid in ids:
249+
od._delete(logical_name, rid)
231250
return None
232251

233252
def get(

src/dataverse_sdk/odata.py

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import time
77
import re
88
import json
9+
from datetime import datetime, timezone
910
import importlib.resources as ir
1011

1112
from .http import HttpClient
@@ -281,13 +282,68 @@ def _update_by_ids(self, logical_name: str, ids: List[str], changes: Union[Dict[
281282
self._update_multiple(entity_set, logical_name, batch)
282283
return None
283284

284-
def _delete_multiple(self, logical_name: str, ids: List[str]) -> None:
285-
"""Delete many records by GUID list (simple loop; potential future optimization point)."""
286-
if not isinstance(ids, list):
287-
raise TypeError("ids must be list[str]")
288-
for rid in ids:
289-
self._delete(logical_name, rid)
290-
return None
285+
def _delete_multiple(
286+
self,
287+
logical_name: str,
288+
ids: List[str],
289+
) -> Optional[str]:
290+
"""Delete many records by GUID list.
291+
292+
Returns the asynchronous job identifier reported by the BulkDelete action.
293+
"""
294+
targets = [rid for rid in ids if rid]
295+
if not targets:
296+
return None
297+
value_objects = [{"Value": rid, "Type": "System.Guid"} for rid in targets]
298+
299+
pk_attr = self._primary_id_attr(logical_name)
300+
timestamp = datetime.now(timezone.utc).isoformat(timespec="seconds").replace("+00:00", "Z")
301+
job_label = f"Bulk delete {logical_name} records @ {timestamp}"
302+
303+
query = {
304+
"@odata.type": "Microsoft.Dynamics.CRM.QueryExpression",
305+
"EntityName": logical_name,
306+
"ColumnSet": {
307+
"@odata.type": "Microsoft.Dynamics.CRM.ColumnSet",
308+
"AllColumns": False,
309+
"Columns": [],
310+
},
311+
"Criteria": {
312+
"@odata.type": "Microsoft.Dynamics.CRM.FilterExpression",
313+
"FilterOperator": "And",
314+
"Conditions": [
315+
{
316+
"@odata.type": "Microsoft.Dynamics.CRM.ConditionExpression",
317+
"AttributeName": pk_attr,
318+
"Operator": "In",
319+
"Values": value_objects,
320+
}
321+
],
322+
},
323+
}
324+
325+
payload = {
326+
"JobName": job_label,
327+
"SendEmailNotification": False,
328+
"ToRecipients": [],
329+
"CCRecipients": [],
330+
"RecurrencePattern": "",
331+
"StartDateTime": timestamp,
332+
"QuerySet": [query],
333+
}
334+
335+
url = f"{self.api}/BulkDelete"
336+
response = self._request("post", url, json=payload, expected=(200, 202, 204))
337+
338+
job_id = None
339+
try:
340+
body = response.json() if response.text else {}
341+
except ValueError:
342+
body = {}
343+
if isinstance(body, dict):
344+
job_id = body.get("JobId")
345+
346+
return job_id
291347

292348
def _format_key(self, key: str) -> str:
293349
k = key.strip()

0 commit comments

Comments
 (0)