@@ -171,8 +171,7 @@ def __init__(
171171 self ._logical_to_entityset_cache : dict [str , str ] = {}
172172 # Cache: normalized table_schema_name (lowercase) -> primary id attribute (e.g. accountid)
173173 self ._logical_primaryid_cache : dict [str , str ] = {}
174- # Picklist label cache: (normalized_table_schema_name, normalized_attribute) -> {'map': {...}, 'ts': epoch_seconds}
175- self ._picklist_label_cache = {}
174+ self ._picklist_label_cache : dict [str , dict ] = {}
176175 self ._picklist_cache_ttl_seconds = 3600 # 1 hour TTL
177176
178177 @contextmanager
@@ -1134,141 +1133,118 @@ def _normalize_picklist_label(self, label: str) -> str:
11341133 norm = re .sub (r"\s+" , " " , norm ).strip ().lower ()
11351134 return norm
11361135
1137- def _optionset_map (self , table_schema_name : str , attr_logical : str ) -> Optional [Dict [str , int ]]:
1138- """Build or return cached mapping of normalized label -> value for a picklist attribute.
1139-
1140- Returns empty dict if attribute is not a picklist or has no options. Returns None only
1141- for invalid inputs or unexpected metadata parse failures.
1142-
1143- Notes
1144- -----
1145- - This method calls the Web API twice per attribute so it could have perf impact when there are lots of columns on the entity.
1146- """
1147- if not table_schema_name or not attr_logical :
1148- return None
1149- # Normalize cache key for case-insensitive lookups
1150- cache_key = (self ._normalize_cache_key (table_schema_name ), self ._normalize_cache_key (attr_logical ))
1151- now = time .time ()
1152- entry = self ._picklist_label_cache .get (cache_key )
1153- if isinstance (entry , dict ) and "map" in entry and (now - entry .get ("ts" , 0 )) < self ._picklist_cache_ttl_seconds :
1154- return entry ["map" ]
1155-
1156- # LogicalNames in Dataverse are stored in lowercase, so we need to lowercase for filters
1157- attr_esc = self ._escape_odata_quotes (attr_logical .lower ())
1158- table_schema_name_esc = self ._escape_odata_quotes (table_schema_name .lower ())
1159-
1160- # Step 1: lightweight fetch (no expand) to determine attribute type
1161- url_type = (
1162- f"{ self .api } /EntityDefinitions(LogicalName='{ table_schema_name_esc } ')/Attributes"
1163- f"?$filter=LogicalName eq '{ attr_esc } '&$select=LogicalName,AttributeType"
1164- )
1165- # Retry on 404 (metadata not yet published) before surfacing the error.
1166- r_type = None
1136+ def _request_metadata_with_retry (self , method : str , url : str , ** kwargs ):
1137+ """Fetch metadata with retries on transient errors."""
11671138 max_attempts = 5
11681139 backoff_seconds = 0.4
11691140 for attempt in range (1 , max_attempts + 1 ):
11701141 try :
1171- r_type = self ._request ("get" , url_type )
1172- break
1142+ return self ._request (method , url , ** kwargs )
11731143 except HttpError as err :
11741144 if getattr (err , "status_code" , None ) == 404 :
11751145 if attempt < max_attempts :
1176- # Exponential backoff: 0.4s, 0.8s, 1.6s, 3.2s
11771146 time .sleep (backoff_seconds * (2 ** (attempt - 1 )))
11781147 continue
1179- raise RuntimeError (
1180- f"Picklist attribute metadata not found after retries: entity='{ table_schema_name } ' attribute='{ attr_logical } ' (404)"
1181- ) from err
1148+ raise RuntimeError (f"Metadata request failed after { max_attempts } retries (404): { url } " ) from err
11821149 raise
1183- if r_type is None :
1184- raise RuntimeError ("Failed to retrieve attribute metadata due to repeated request failures." )
11851150
1186- body_type = r_type .json ()
1187- items = body_type .get ("value" , []) if isinstance (body_type , dict ) else []
1188- if not items :
1189- return None
1190- attr_md = items [0 ]
1191- if attr_md .get ("AttributeType" ) not in ("Picklist" , "PickList" ):
1192- self ._picklist_label_cache [cache_key ] = {"map" : {}, "ts" : now }
1193- return {}
1194-
1195- # Step 2: fetch with expand only now that we know it's a picklist
1196- # Need to cast to the derived PicklistAttributeMetadata type; OptionSet is not a nav on base AttributeMetadata.
1197- cast_url = (
1198- f"{ self .api } /EntityDefinitions(LogicalName='{ table_schema_name_esc } ')/Attributes(LogicalName='{ attr_esc } ')/"
1199- "Microsoft.Dynamics.CRM.PicklistAttributeMetadata?$select=LogicalName&$expand=OptionSet($select=Options)"
1151+ def _bulk_fetch_picklists (self , table_schema_name : str ) -> None :
1152+ """Fetch all picklist attributes and their options for a table in one API call.
1153+
1154+ Uses collection-level PicklistAttributeMetadata cast to retrieve every picklist
1155+ attribute on the table, including its OptionSet options. Populates the nested
1156+ cache so that ``_convert_labels_to_ints`` resolves labels without further API calls.
1157+ The Dataverse metadata API does not page results.
1158+ """
1159+ table_key = self ._normalize_cache_key (table_schema_name )
1160+ now = time .time ()
1161+ table_entry = self ._picklist_label_cache .get (table_key )
1162+ if isinstance (table_entry , dict ) and (now - table_entry .get ("ts" , 0 )) < self ._picklist_cache_ttl_seconds :
1163+ return
1164+
1165+ table_esc = self ._escape_odata_quotes (table_schema_name .lower ())
1166+ url = (
1167+ f"{ self .api } /EntityDefinitions(LogicalName='{ table_esc } ')"
1168+ f"/Attributes/Microsoft.Dynamics.CRM.PicklistAttributeMetadata"
1169+ f"?$select=LogicalName&$expand=OptionSet($select=Options)"
12001170 )
1201- # Step 2 fetch with retries: expanded OptionSet (cast form first)
1202- r_opts = None
1203- for attempt in range (1 , max_attempts + 1 ):
1204- try :
1205- r_opts = self ._request ("get" , cast_url )
1206- break
1207- except HttpError as err :
1208- if getattr (err , "status_code" , None ) == 404 :
1209- if attempt < max_attempts :
1210- time .sleep (backoff_seconds * (2 ** (attempt - 1 )))
1211- continue
1212- raise RuntimeError (
1213- f"Picklist OptionSet metadata not found after retries: entity='{ table_schema_name } ' attribute='{ attr_logical } ' (404)"
1214- ) from err
1215- raise
1216- if r_opts is None :
1217- raise RuntimeError ("Failed to retrieve picklist OptionSet metadata due to repeated request failures." )
1171+ response = self ._request_metadata_with_retry ("get" , url )
1172+ body = response .json ()
1173+ items = body .get ("value" , []) if isinstance (body , dict ) else []
12181174
1219- attr_full = {}
1220- try :
1221- attr_full = r_opts .json () if r_opts .text else {}
1222- except ValueError :
1223- return None
1224- option_set = attr_full .get ("OptionSet" ) or {}
1225- options = option_set .get ("Options" ) if isinstance (option_set , dict ) else None
1226- if not isinstance (options , list ):
1227- return None
1228- mapping : Dict [str , int ] = {}
1229- for opt in options :
1230- if not isinstance (opt , dict ):
1175+ picklists : Dict [str , Dict [str , int ]] = {}
1176+ for item in items :
1177+ if not isinstance (item , dict ):
12311178 continue
1232- val = opt .get ("Value" )
1233- if not isinstance ( val , int ) :
1179+ ln = item .get ("LogicalName" , "" ). lower ( )
1180+ if not ln :
12341181 continue
1235- label_def = opt .get ("Label" ) or {}
1236- locs = label_def .get ("LocalizedLabels" )
1237- if isinstance (locs , list ):
1238- for loc in locs :
1239- if isinstance (loc , dict ):
1240- lab = loc .get ("Label" )
1241- if isinstance (lab , str ) and lab .strip ():
1242- normalized = self ._normalize_picklist_label (lab )
1243- mapping .setdefault (normalized , val )
1244- if mapping :
1245- self ._picklist_label_cache [cache_key ] = {"map" : mapping , "ts" : now }
1246- return mapping
1247- # No options available
1248- self ._picklist_label_cache [cache_key ] = {"map" : {}, "ts" : now }
1249- return {}
1182+ option_set = item .get ("OptionSet" ) or {}
1183+ options = option_set .get ("Options" ) if isinstance (option_set , dict ) else None
1184+ mapping : Dict [str , int ] = {}
1185+ if isinstance (options , list ):
1186+ for opt in options :
1187+ if not isinstance (opt , dict ):
1188+ continue
1189+ val = opt .get ("Value" )
1190+ if not isinstance (val , int ):
1191+ continue
1192+ label_def = opt .get ("Label" ) or {}
1193+ locs = label_def .get ("LocalizedLabels" )
1194+ if isinstance (locs , list ):
1195+ for loc in locs :
1196+ if isinstance (loc , dict ):
1197+ lab = loc .get ("Label" )
1198+ if isinstance (lab , str ) and lab .strip ():
1199+ normalized = self ._normalize_picklist_label (lab )
1200+ mapping .setdefault (normalized , val )
1201+ picklists [ln ] = mapping
1202+
1203+ self ._picklist_label_cache [table_key ] = {"ts" : now , "picklists" : picklists }
12501204
12511205 def _convert_labels_to_ints (self , table_schema_name : str , record : Dict [str , Any ]) -> Dict [str , Any ]:
12521206 """Return a copy of record with any labels converted to option ints.
12531207
12541208 Heuristic: For each string value, attempt to resolve against picklist metadata.
12551209 If attribute isn't a picklist or label not found, value left unchanged.
1210+
1211+ On first encounter of a table, bulk-fetches all picklist attributes and
1212+ their options in a single API call, then resolves labels from the warm cache.
12561213 """
1257- out = record .copy ()
1258- for k , v in list (out .items ()):
1214+ resolved_record = record .copy ()
1215+
1216+ # Check if there are any string-valued candidates worth resolving
1217+ has_candidates = any (
1218+ isinstance (v , str ) and v .strip () and isinstance (k , str ) and "@odata." not in k
1219+ for k , v in resolved_record .items ()
1220+ )
1221+ if not has_candidates :
1222+ return resolved_record
1223+
1224+ # Bulk-fetch all picklists for this table (1 API call, cached for TTL)
1225+ self ._bulk_fetch_picklists (table_schema_name )
1226+
1227+ # Resolve labels from the nested cache
1228+ table_key = self ._normalize_cache_key (table_schema_name )
1229+ table_entry = self ._picklist_label_cache .get (table_key )
1230+ if not isinstance (table_entry , dict ):
1231+ return resolved_record
1232+ picklists = table_entry .get ("picklists" , {})
1233+
1234+ for k , v in resolved_record .items ():
12591235 if not isinstance (v , str ) or not v .strip ():
12601236 continue
1261- # Skip OData annotations — they are not attribute names
12621237 if isinstance (k , str ) and "@odata." in k :
12631238 continue
1264- mapping = self ._optionset_map (table_schema_name , k )
1265- if not mapping :
1239+ attr_key = self ._normalize_cache_key (k )
1240+ mapping = picklists .get (attr_key )
1241+ if not isinstance (mapping , dict ) or not mapping :
12661242 continue
12671243 norm = self ._normalize_picklist_label (v )
12681244 val = mapping .get (norm )
12691245 if val is not None :
1270- out [k ] = val
1271- return out
1246+ resolved_record [k ] = val
1247+ return resolved_record
12721248
12731249 def _attribute_payload (
12741250 self , column_schema_name : str , dtype : Any , * , is_primary_name : bool = False
0 commit comments