Skip to content

Commit 78cd852

Browse files
abelmilash-msftAbel Milashclaude
authored
Optimize picklist label resolution with bulk PicklistAttributeMetadata fetch (#154)
## Summary Reduces API calls during picklist label-to-integer resolution by fetching all picklist attributes and their options for the entire table in a single API call using the `PicklistAttributeMetadata` cast, instead of checking each attribute individually. Results are cached with a 1-hour TTL. ## Changes **`src/PowerPlatform/Dataverse/data/_odata.py`** - Add `_bulk_fetch_picklists()` — single API call to fetch all picklist attributes and their options for a table - Add `_request_metadata_with_retry()` — exponential backoff on transient metadata errors - Simplify `_convert_labels_to_ints()` — calls `_bulk_fetch_picklists` then resolves labels from cache **`tests/unit/data/test_odata_internal.py`** - Rewrite `TestPicklistLabelResolution` class with 50 unit tests covering `_bulk_fetch_picklists`, `_request_metadata_with_retry`, `_convert_labels_to_ints`, integration through `_create`/`_update`/`_upsert`, and edge cases **`examples/advanced/walkthrough.py`** - Add picklist label update test to Section 10 (verifies both create and update with string labels) ## Performance impact Cold cache API calls reduced from `n + p` to always `1`, where `p` = picklist fields, `n` = string fields. | Picklist Columns | Before Calls | Before Time | After Calls | After Time | Speedup | |-----------------|-------------|-------------|-------------|------------|---------| | 1 | 2 | 0.6s | 1 | 0.3s | 2x | | 10 | 11 | 3.3s | 1 | 0.4s | 9x | | 100 | 101 | 34s | 1 | 0.6s | 55x | | 250 | 251 | 79s | 1 | 1.2s | 64x | | 400 | 401 | 119s | 1 | 1.3s | 92x | Repeat operations use a 1-hour TTL cache (0 API calls, <5ms). ## Testing - 660 unit tests passing - Performance benchmarks verified against live Dataverse environment (5 runs each) --------- Co-authored-by: Abel Milash <abelmilash@microsoft.com> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 5cd086c commit 78cd852

File tree

4 files changed

+858
-119
lines changed

4 files changed

+858
-119
lines changed

examples/advanced/walkthrough.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,16 @@ def _run_walkthrough(client):
445445
print(f" new_Priority stored as integer: {retrieved.get('new_priority')}")
446446
print(f" new_Priority@FormattedValue: {retrieved.get('new_priority@OData.Community.Display.V1.FormattedValue')}")
447447

448+
# Update with a string label
449+
log_call(f"client.records.update('{table_name}', label_id, {{'new_Priority': 'Low'}})")
450+
backoff(lambda: client.records.update(table_name, label_id, {"new_Priority": "Low"}))
451+
updated_label = backoff(lambda: client.records.get(table_name, label_id))
452+
print(f"[OK] Updated record with string label 'Low' for new_Priority")
453+
print(f" new_Priority stored as integer: {updated_label.get('new_priority')}")
454+
print(
455+
f" new_Priority@FormattedValue: {updated_label.get('new_priority@OData.Community.Display.V1.FormattedValue')}"
456+
)
457+
448458
# ============================================================================
449459
# 11. COLUMN MANAGEMENT
450460
# ============================================================================

src/PowerPlatform/Dataverse/data/_odata.py

Lines changed: 83 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,7 @@ def __init__(
171171
self._logical_to_entityset_cache: dict[str, str] = {}
172172
# Cache: normalized table_schema_name (lowercase) -> primary id attribute (e.g. accountid)
173173
self._logical_primaryid_cache: dict[str, str] = {}
174-
# Picklist label cache: (normalized_table_schema_name, normalized_attribute) -> {'map': {...}, 'ts': epoch_seconds}
175-
self._picklist_label_cache = {}
174+
self._picklist_label_cache: dict[str, dict] = {}
176175
self._picklist_cache_ttl_seconds = 3600 # 1 hour TTL
177176

178177
@contextmanager
@@ -1134,141 +1133,118 @@ def _normalize_picklist_label(self, label: str) -> str:
11341133
norm = re.sub(r"\s+", " ", norm).strip().lower()
11351134
return norm
11361135

1137-
def _optionset_map(self, table_schema_name: str, attr_logical: str) -> Optional[Dict[str, int]]:
1138-
"""Build or return cached mapping of normalized label -> value for a picklist attribute.
1139-
1140-
Returns empty dict if attribute is not a picklist or has no options. Returns None only
1141-
for invalid inputs or unexpected metadata parse failures.
1142-
1143-
Notes
1144-
-----
1145-
- This method calls the Web API twice per attribute so it could have perf impact when there are lots of columns on the entity.
1146-
"""
1147-
if not table_schema_name or not attr_logical:
1148-
return None
1149-
# Normalize cache key for case-insensitive lookups
1150-
cache_key = (self._normalize_cache_key(table_schema_name), self._normalize_cache_key(attr_logical))
1151-
now = time.time()
1152-
entry = self._picklist_label_cache.get(cache_key)
1153-
if isinstance(entry, dict) and "map" in entry and (now - entry.get("ts", 0)) < self._picklist_cache_ttl_seconds:
1154-
return entry["map"]
1155-
1156-
# LogicalNames in Dataverse are stored in lowercase, so we need to lowercase for filters
1157-
attr_esc = self._escape_odata_quotes(attr_logical.lower())
1158-
table_schema_name_esc = self._escape_odata_quotes(table_schema_name.lower())
1159-
1160-
# Step 1: lightweight fetch (no expand) to determine attribute type
1161-
url_type = (
1162-
f"{self.api}/EntityDefinitions(LogicalName='{table_schema_name_esc}')/Attributes"
1163-
f"?$filter=LogicalName eq '{attr_esc}'&$select=LogicalName,AttributeType"
1164-
)
1165-
# Retry on 404 (metadata not yet published) before surfacing the error.
1166-
r_type = None
1136+
def _request_metadata_with_retry(self, method: str, url: str, **kwargs):
1137+
"""Fetch metadata with retries on transient errors."""
11671138
max_attempts = 5
11681139
backoff_seconds = 0.4
11691140
for attempt in range(1, max_attempts + 1):
11701141
try:
1171-
r_type = self._request("get", url_type)
1172-
break
1142+
return self._request(method, url, **kwargs)
11731143
except HttpError as err:
11741144
if getattr(err, "status_code", None) == 404:
11751145
if attempt < max_attempts:
1176-
# Exponential backoff: 0.4s, 0.8s, 1.6s, 3.2s
11771146
time.sleep(backoff_seconds * (2 ** (attempt - 1)))
11781147
continue
1179-
raise RuntimeError(
1180-
f"Picklist attribute metadata not found after retries: entity='{table_schema_name}' attribute='{attr_logical}' (404)"
1181-
) from err
1148+
raise RuntimeError(f"Metadata request failed after {max_attempts} retries (404): {url}") from err
11821149
raise
1183-
if r_type is None:
1184-
raise RuntimeError("Failed to retrieve attribute metadata due to repeated request failures.")
11851150

1186-
body_type = r_type.json()
1187-
items = body_type.get("value", []) if isinstance(body_type, dict) else []
1188-
if not items:
1189-
return None
1190-
attr_md = items[0]
1191-
if attr_md.get("AttributeType") not in ("Picklist", "PickList"):
1192-
self._picklist_label_cache[cache_key] = {"map": {}, "ts": now}
1193-
return {}
1194-
1195-
# Step 2: fetch with expand only now that we know it's a picklist
1196-
# Need to cast to the derived PicklistAttributeMetadata type; OptionSet is not a nav on base AttributeMetadata.
1197-
cast_url = (
1198-
f"{self.api}/EntityDefinitions(LogicalName='{table_schema_name_esc}')/Attributes(LogicalName='{attr_esc}')/"
1199-
"Microsoft.Dynamics.CRM.PicklistAttributeMetadata?$select=LogicalName&$expand=OptionSet($select=Options)"
1151+
def _bulk_fetch_picklists(self, table_schema_name: str) -> None:
1152+
"""Fetch all picklist attributes and their options for a table in one API call.
1153+
1154+
Uses collection-level PicklistAttributeMetadata cast to retrieve every picklist
1155+
attribute on the table, including its OptionSet options. Populates the nested
1156+
cache so that ``_convert_labels_to_ints`` resolves labels without further API calls.
1157+
The Dataverse metadata API does not page results.
1158+
"""
1159+
table_key = self._normalize_cache_key(table_schema_name)
1160+
now = time.time()
1161+
table_entry = self._picklist_label_cache.get(table_key)
1162+
if isinstance(table_entry, dict) and (now - table_entry.get("ts", 0)) < self._picklist_cache_ttl_seconds:
1163+
return
1164+
1165+
table_esc = self._escape_odata_quotes(table_schema_name.lower())
1166+
url = (
1167+
f"{self.api}/EntityDefinitions(LogicalName='{table_esc}')"
1168+
f"/Attributes/Microsoft.Dynamics.CRM.PicklistAttributeMetadata"
1169+
f"?$select=LogicalName&$expand=OptionSet($select=Options)"
12001170
)
1201-
# Step 2 fetch with retries: expanded OptionSet (cast form first)
1202-
r_opts = None
1203-
for attempt in range(1, max_attempts + 1):
1204-
try:
1205-
r_opts = self._request("get", cast_url)
1206-
break
1207-
except HttpError as err:
1208-
if getattr(err, "status_code", None) == 404:
1209-
if attempt < max_attempts:
1210-
time.sleep(backoff_seconds * (2 ** (attempt - 1)))
1211-
continue
1212-
raise RuntimeError(
1213-
f"Picklist OptionSet metadata not found after retries: entity='{table_schema_name}' attribute='{attr_logical}' (404)"
1214-
) from err
1215-
raise
1216-
if r_opts is None:
1217-
raise RuntimeError("Failed to retrieve picklist OptionSet metadata due to repeated request failures.")
1171+
response = self._request_metadata_with_retry("get", url)
1172+
body = response.json()
1173+
items = body.get("value", []) if isinstance(body, dict) else []
12181174

1219-
attr_full = {}
1220-
try:
1221-
attr_full = r_opts.json() if r_opts.text else {}
1222-
except ValueError:
1223-
return None
1224-
option_set = attr_full.get("OptionSet") or {}
1225-
options = option_set.get("Options") if isinstance(option_set, dict) else None
1226-
if not isinstance(options, list):
1227-
return None
1228-
mapping: Dict[str, int] = {}
1229-
for opt in options:
1230-
if not isinstance(opt, dict):
1175+
picklists: Dict[str, Dict[str, int]] = {}
1176+
for item in items:
1177+
if not isinstance(item, dict):
12311178
continue
1232-
val = opt.get("Value")
1233-
if not isinstance(val, int):
1179+
ln = item.get("LogicalName", "").lower()
1180+
if not ln:
12341181
continue
1235-
label_def = opt.get("Label") or {}
1236-
locs = label_def.get("LocalizedLabels")
1237-
if isinstance(locs, list):
1238-
for loc in locs:
1239-
if isinstance(loc, dict):
1240-
lab = loc.get("Label")
1241-
if isinstance(lab, str) and lab.strip():
1242-
normalized = self._normalize_picklist_label(lab)
1243-
mapping.setdefault(normalized, val)
1244-
if mapping:
1245-
self._picklist_label_cache[cache_key] = {"map": mapping, "ts": now}
1246-
return mapping
1247-
# No options available
1248-
self._picklist_label_cache[cache_key] = {"map": {}, "ts": now}
1249-
return {}
1182+
option_set = item.get("OptionSet") or {}
1183+
options = option_set.get("Options") if isinstance(option_set, dict) else None
1184+
mapping: Dict[str, int] = {}
1185+
if isinstance(options, list):
1186+
for opt in options:
1187+
if not isinstance(opt, dict):
1188+
continue
1189+
val = opt.get("Value")
1190+
if not isinstance(val, int):
1191+
continue
1192+
label_def = opt.get("Label") or {}
1193+
locs = label_def.get("LocalizedLabels")
1194+
if isinstance(locs, list):
1195+
for loc in locs:
1196+
if isinstance(loc, dict):
1197+
lab = loc.get("Label")
1198+
if isinstance(lab, str) and lab.strip():
1199+
normalized = self._normalize_picklist_label(lab)
1200+
mapping.setdefault(normalized, val)
1201+
picklists[ln] = mapping
1202+
1203+
self._picklist_label_cache[table_key] = {"ts": now, "picklists": picklists}
12501204

12511205
def _convert_labels_to_ints(self, table_schema_name: str, record: Dict[str, Any]) -> Dict[str, Any]:
12521206
"""Return a copy of record with any labels converted to option ints.
12531207
12541208
Heuristic: For each string value, attempt to resolve against picklist metadata.
12551209
If attribute isn't a picklist or label not found, value left unchanged.
1210+
1211+
On first encounter of a table, bulk-fetches all picklist attributes and
1212+
their options in a single API call, then resolves labels from the warm cache.
12561213
"""
1257-
out = record.copy()
1258-
for k, v in list(out.items()):
1214+
resolved_record = record.copy()
1215+
1216+
# Check if there are any string-valued candidates worth resolving
1217+
has_candidates = any(
1218+
isinstance(v, str) and v.strip() and isinstance(k, str) and "@odata." not in k
1219+
for k, v in resolved_record.items()
1220+
)
1221+
if not has_candidates:
1222+
return resolved_record
1223+
1224+
# Bulk-fetch all picklists for this table (1 API call, cached for TTL)
1225+
self._bulk_fetch_picklists(table_schema_name)
1226+
1227+
# Resolve labels from the nested cache
1228+
table_key = self._normalize_cache_key(table_schema_name)
1229+
table_entry = self._picklist_label_cache.get(table_key)
1230+
if not isinstance(table_entry, dict):
1231+
return resolved_record
1232+
picklists = table_entry.get("picklists", {})
1233+
1234+
for k, v in resolved_record.items():
12591235
if not isinstance(v, str) or not v.strip():
12601236
continue
1261-
# Skip OData annotations — they are not attribute names
12621237
if isinstance(k, str) and "@odata." in k:
12631238
continue
1264-
mapping = self._optionset_map(table_schema_name, k)
1265-
if not mapping:
1239+
attr_key = self._normalize_cache_key(k)
1240+
mapping = picklists.get(attr_key)
1241+
if not isinstance(mapping, dict) or not mapping:
12661242
continue
12671243
norm = self._normalize_picklist_label(v)
12681244
val = mapping.get(norm)
12691245
if val is not None:
1270-
out[k] = val
1271-
return out
1246+
resolved_record[k] = val
1247+
return resolved_record
12721248

12731249
def _attribute_payload(
12741250
self, column_schema_name: str, dtype: Any, *, is_primary_name: bool = False

0 commit comments

Comments
 (0)