@@ -12,9 +12,19 @@ def format_potential_core_components(leaf_nodes: List[str], components: Dict[str
1212 """
1313 Format the potential core components into a string that can be used in the prompt.
1414 """
15+ # Filter out any invalid leaf nodes that don't exist in components
16+ valid_leaf_nodes = []
17+ for leaf_node in leaf_nodes :
18+ if leaf_node in components :
19+ valid_leaf_nodes .append (leaf_node )
20+ else :
21+ import logging
22+ logger = logging .getLogger (__name__ )
23+ logger .warning (f"Skipping invalid leaf node '{ leaf_node } ' - not found in components" )
24+
1525 #group leaf nodes by file
1626 leaf_nodes_by_file = defaultdict (list )
17- for leaf_node in leaf_nodes :
27+ for leaf_node in valid_leaf_nodes :
1828 leaf_nodes_by_file [components [leaf_node ].relative_path ].append (leaf_node )
1929
2030 potential_core_components = ""
@@ -49,7 +59,27 @@ def cluster_modules(
4959 response = call_llm (prompt , model = MAIN_MODEL )
5060
5161 #parse the response
52- module_tree = eval (response .split ("<GROUPED_COMPONENTS>" )[1 ].split ("</GROUPED_COMPONENTS>" )[0 ])
62+ try :
63+ if "<GROUPED_COMPONENTS>" not in response or "</GROUPED_COMPONENTS>" not in response :
64+ import logging
65+ logger = logging .getLogger (__name__ )
66+ logger .error (f"Invalid LLM response format - missing component tags: { response [:200 ]} ..." )
67+ return {}
68+
69+ response_content = response .split ("<GROUPED_COMPONENTS>" )[1 ].split ("</GROUPED_COMPONENTS>" )[0 ]
70+ module_tree = eval (response_content )
71+
72+ if not isinstance (module_tree , dict ):
73+ import logging
74+ logger = logging .getLogger (__name__ )
75+ logger .error (f"Invalid module tree format - expected dict, got { type (module_tree )} " )
76+ return {}
77+
78+ except Exception as e :
79+ import logging
80+ logger = logging .getLogger (__name__ )
81+ logger .error (f"Failed to parse LLM response: { e } . Response: { response [:200 ]} ..." )
82+ return {}
5383
5484 # check if the module tree is valid
5585 if len (module_tree ) <= 1 :
@@ -66,10 +96,21 @@ def cluster_modules(
6696 value [module_name ] = module_info
6797
6898 for module_name , module_info in module_tree .items ():
69- sub_leaf_nodes = module_info ["components" ]
99+ sub_leaf_nodes = module_info .get ("components" , [])
100+
101+ # Filter sub_leaf_nodes to ensure they exist in components
102+ valid_sub_leaf_nodes = []
103+ for node in sub_leaf_nodes :
104+ if node in components :
105+ valid_sub_leaf_nodes .append (node )
106+ else :
107+ import logging
108+ logger = logging .getLogger (__name__ )
109+ logger .warning (f"Skipping invalid sub leaf node '{ node } ' in module '{ module_name } ' - not found in components" )
110+
70111 current_module_path .append (module_name )
71112 module_info ["children" ] = {}
72- module_info ["children" ] = cluster_modules (sub_leaf_nodes , components , current_module_tree , module_name , current_module_path )
113+ module_info ["children" ] = cluster_modules (valid_sub_leaf_nodes , components , current_module_tree , module_name , current_module_path )
73114 current_module_path .pop ()
74115
75116 return module_tree
0 commit comments