|
7 | 7 | import typing |
8 | 8 |
|
9 | 9 | import click |
| 10 | +import rich |
| 11 | +import rich.box |
10 | 12 | from packaging.requirements import Requirement |
11 | | -from packaging.utils import canonicalize_name |
| 13 | +from packaging.utils import NormalizedName, canonicalize_name |
12 | 14 | from packaging.version import Version |
| 15 | +from rich.table import Table |
13 | 16 |
|
14 | 17 | from fromager import clickext, context |
15 | 18 | from fromager.commands import bootstrap |
@@ -784,3 +787,288 @@ def n2s(nodes: typing.Iterable[DependencyNode]) -> str: |
784 | 787 | topo.done(*nodes_to_build) |
785 | 788 |
|
786 | 789 | print(f"\nBuilding {len(graph)} packages in {rounds} rounds.") |
| 790 | + |
| 791 | + |
| 792 | +def _traverse_edges( |
| 793 | + node: DependencyNode, |
| 794 | +) -> typing.Iterable[DependencyNode]: |
| 795 | + """Iterative DFS over all of a node's dependency edges. |
| 796 | +
|
| 797 | + Yields every reachable ``DependencyNode`` exactly once, excluding the |
| 798 | + starting *node* itself. All edge types are followed (install, build, |
| 799 | + toplevel). |
| 800 | +
|
| 801 | + Args: |
| 802 | + node: The starting node (not yielded). |
| 803 | +
|
| 804 | + Yields: |
| 805 | + Each reachable DependencyNode, in DFS order. |
| 806 | + """ |
| 807 | + visited: set[str] = {node.key} |
| 808 | + stack: list[DependencyNode] = [node] |
| 809 | + while stack: |
| 810 | + current = stack.pop() |
| 811 | + for edge in current.children: |
| 812 | + child_node = edge.destination_node |
| 813 | + if child_node.key in visited: |
| 814 | + continue |
| 815 | + visited.add(child_node.key) |
| 816 | + yield child_node |
| 817 | + stack.append(child_node) |
| 818 | + |
| 819 | + |
| 820 | +def get_dependency_closure(node: DependencyNode) -> set[NormalizedName]: |
| 821 | + """Compute the full dependency closure for a node. |
| 822 | +
|
| 823 | + Traverses all edge types and returns the set of canonical package names |
| 824 | + reachable from *node*, including *node* itself. |
| 825 | +
|
| 826 | + Args: |
| 827 | + node: The starting node to compute the closure for. |
| 828 | +
|
| 829 | + Returns: |
| 830 | + Set of canonicalized package names in the transitive closure. |
| 831 | + """ |
| 832 | + dependency_names: set[NormalizedName] = set() |
| 833 | + if node.canonicalized_name != ROOT: |
| 834 | + dependency_names.add(node.canonicalized_name) |
| 835 | + for dependency in _traverse_edges(node): |
| 836 | + if dependency.canonicalized_name != ROOT: |
| 837 | + dependency_names.add(dependency.canonicalized_name) |
| 838 | + return dependency_names |
| 839 | + |
| 840 | + |
| 841 | +def get_package_names(graph: DependencyGraph) -> set[NormalizedName]: |
| 842 | + """Extract all unique canonical package names from a graph. |
| 843 | +
|
| 844 | + Args: |
| 845 | + graph: The dependency graph to extract names from. |
| 846 | +
|
| 847 | + Returns: |
| 848 | + Set of canonicalized package names, excluding the ROOT node. |
| 849 | + """ |
| 850 | + return { |
| 851 | + node.canonicalized_name for node in graph.get_all_nodes() if node.key != ROOT |
| 852 | + } |
| 853 | + |
| 854 | + |
| 855 | +def extract_collection_name(graph_path: str) -> str: |
| 856 | + """Derive a collection name from a graph file path. |
| 857 | +
|
| 858 | + Uses the filename stem, stripping a trailing ``-graph`` suffix when present. |
| 859 | + For example, ``notebook-graph.json`` becomes ``notebook``. |
| 860 | +
|
| 861 | + Args: |
| 862 | + graph_path: Filesystem path to a graph JSON file. |
| 863 | +
|
| 864 | + Returns: |
| 865 | + A short collection name string. |
| 866 | + """ |
| 867 | + stem = pathlib.PurePath(graph_path).stem |
| 868 | + if stem.endswith("-graph"): |
| 869 | + stem = stem[: -len("-graph")] |
| 870 | + return stem |
| 871 | + |
| 872 | + |
| 873 | +class _CollectionScore(typing.NamedTuple): |
| 874 | + """Overlap score between a package's dependency closure and a collection.""" |
| 875 | + |
| 876 | + collection: str |
| 877 | + new_packages: int |
| 878 | + existing_packages: int |
| 879 | + coverage_percentage: float |
| 880 | + |
| 881 | + |
| 882 | +def _analyze_suggestions( |
| 883 | + toplevel_nodes: list[DependencyNode], |
| 884 | + collection_packages: dict[str, set[NormalizedName]], |
| 885 | +) -> list[dict[str, typing.Any]]: |
| 886 | + """Score each onboarding top-level package against every collection. |
| 887 | +
|
| 888 | + Args: |
| 889 | + toplevel_nodes: Top-level nodes from the onboarding graph. |
| 890 | + collection_packages: Mapping of collection name to its package name set. |
| 891 | +
|
| 892 | + Returns: |
| 893 | + List of result dicts, one per top-level package, sorted by package name. |
| 894 | + """ |
| 895 | + results: list[dict[str, typing.Any]] = [] |
| 896 | + |
| 897 | + for node in sorted(toplevel_nodes, key=lambda n: n.canonicalized_name): |
| 898 | + dependency_names = get_dependency_closure(node) |
| 899 | + total_dependency_count = len(dependency_names) |
| 900 | + |
| 901 | + scores: list[_CollectionScore] = [] |
| 902 | + for collection_name, packages in collection_packages.items(): |
| 903 | + existing_count = len(dependency_names & packages) |
| 904 | + new_count = total_dependency_count - existing_count |
| 905 | + coverage_percentage = ( |
| 906 | + (existing_count / total_dependency_count * 100) |
| 907 | + if total_dependency_count |
| 908 | + else 0.0 |
| 909 | + ) |
| 910 | + scores.append( |
| 911 | + _CollectionScore( |
| 912 | + collection_name, new_count, existing_count, coverage_percentage |
| 913 | + ) |
| 914 | + ) |
| 915 | + |
| 916 | + # Rank: fewest new packages, then highest coverage, then name for determinism |
| 917 | + scores.sort( |
| 918 | + key=lambda score: ( |
| 919 | + score.new_packages, |
| 920 | + -score.coverage_percentage, |
| 921 | + score.collection, |
| 922 | + ) |
| 923 | + ) |
| 924 | + best_score = scores[0] if scores else None |
| 925 | + |
| 926 | + logger.debug( |
| 927 | + "%s: %d deps, best fit '%s' (%d new, %.1f%% coverage)", |
| 928 | + node.canonicalized_name, |
| 929 | + total_dependency_count, |
| 930 | + best_score.collection if best_score else "none", |
| 931 | + best_score.new_packages if best_score else 0, |
| 932 | + best_score.coverage_percentage if best_score else 0.0, |
| 933 | + ) |
| 934 | + |
| 935 | + results.append( |
| 936 | + { |
| 937 | + "package": str(node.canonicalized_name), |
| 938 | + "version": str(node.version), |
| 939 | + "total_dependencies": total_dependency_count, |
| 940 | + "best_fit": best_score.collection if best_score else "none", |
| 941 | + "new_packages": best_score.new_packages if best_score else 0, |
| 942 | + "existing_packages": best_score.existing_packages if best_score else 0, |
| 943 | + "coverage_percentage": round(best_score.coverage_percentage, 1) |
| 944 | + if best_score |
| 945 | + else 0.0, |
| 946 | + "all_collections": [ |
| 947 | + { |
| 948 | + "collection": score.collection, |
| 949 | + "new_packages": score.new_packages, |
| 950 | + "existing_packages": score.existing_packages, |
| 951 | + "coverage_percentage": round(score.coverage_percentage, 1), |
| 952 | + } |
| 953 | + for score in scores |
| 954 | + ], |
| 955 | + } |
| 956 | + ) |
| 957 | + |
| 958 | + return results |
| 959 | + |
| 960 | + |
| 961 | +def _print_suggest_collection_table( |
| 962 | + results: list[dict[str, typing.Any]], |
| 963 | +) -> None: |
| 964 | + """Render suggest-collection results as a Rich table.""" |
| 965 | + table = Table( |
| 966 | + title="Collection Suggestions for Onboarding Packages", |
| 967 | + box=rich.box.MARKDOWN, |
| 968 | + title_justify="left", |
| 969 | + ) |
| 970 | + table.add_column("Package", justify="left", no_wrap=True) |
| 971 | + table.add_column("Version", justify="left", no_wrap=True) |
| 972 | + table.add_column("Total Deps", justify="right", no_wrap=True) |
| 973 | + table.add_column("Best Fit", justify="left", no_wrap=True) |
| 974 | + table.add_column("New Pkgs", justify="right", no_wrap=True) |
| 975 | + table.add_column("Existing", justify="right", no_wrap=True) |
| 976 | + table.add_column("Coverage", justify="right", no_wrap=True) |
| 977 | + |
| 978 | + for result in results: |
| 979 | + table.add_row( |
| 980 | + result["package"], |
| 981 | + result["version"], |
| 982 | + str(result["total_dependencies"]), |
| 983 | + result["best_fit"], |
| 984 | + str(result["new_packages"]), |
| 985 | + str(result["existing_packages"]), |
| 986 | + f"{result['coverage_percentage']:.1f}%", |
| 987 | + ) |
| 988 | + |
| 989 | + rich.get_console().print(table) |
| 990 | + |
| 991 | + |
| 992 | +@graph.command(name="suggest-collection") |
| 993 | +@click.option( |
| 994 | + "--format", |
| 995 | + "output_format", |
| 996 | + type=click.Choice(["table", "json"], case_sensitive=False), |
| 997 | + default="table", |
| 998 | + help="Output format (default: table)", |
| 999 | +) |
| 1000 | +@click.argument("onboarding-graph", type=str) |
| 1001 | +@click.argument("collection-graphs", nargs=-1, required=True, type=str) |
| 1002 | +def suggest_collection( |
| 1003 | + output_format: str, |
| 1004 | + onboarding_graph: str, |
| 1005 | + collection_graphs: tuple[str, ...], |
| 1006 | +) -> None: |
| 1007 | + """Suggest the best-fit collection for each onboarding package. |
| 1008 | +
|
| 1009 | + Analyzes dependency overlap between top-level packages in ONBOARDING_GRAPH |
| 1010 | + and the existing COLLECTION_GRAPHS to recommend where each onboarding |
| 1011 | + package should be placed. |
| 1012 | +
|
| 1013 | + For each top-level package in the onboarding graph, computes the full |
| 1014 | + transitive dependency closure and compares it against every collection. |
| 1015 | + Collections are ranked by fewest new packages required, then by highest |
| 1016 | + dependency coverage. |
| 1017 | +
|
| 1018 | + \b |
| 1019 | + ONBOARDING_GRAPH Path to the onboarding collection graph.json. |
| 1020 | + COLLECTION_GRAPHS One or more paths to existing collection graph.json files. |
| 1021 | + """ |
| 1022 | + try: |
| 1023 | + onboarding = DependencyGraph.from_file(onboarding_graph) |
| 1024 | + except Exception as err: |
| 1025 | + raise click.ClickException( |
| 1026 | + f"Failed to load onboarding graph {onboarding_graph}: {err}" |
| 1027 | + ) from err |
| 1028 | + |
| 1029 | + root = onboarding.get_root_node() |
| 1030 | + |
| 1031 | + toplevel_nodes: list[DependencyNode] = [ |
| 1032 | + edge.destination_node |
| 1033 | + for edge in root.children |
| 1034 | + if edge.req_type == RequirementType.TOP_LEVEL |
| 1035 | + ] |
| 1036 | + |
| 1037 | + if not toplevel_nodes: |
| 1038 | + if output_format == "json": |
| 1039 | + click.echo("[]") |
| 1040 | + else: |
| 1041 | + click.echo("No top-level packages found in onboarding graph.") |
| 1042 | + return |
| 1043 | + |
| 1044 | + logger.info( |
| 1045 | + "Loaded onboarding graph with %d top-level packages", len(toplevel_nodes) |
| 1046 | + ) |
| 1047 | + |
| 1048 | + collection_packages: dict[str, set[NormalizedName]] = {} |
| 1049 | + for graph_path in collection_graphs: |
| 1050 | + collection_name = extract_collection_name(graph_path) |
| 1051 | + if collection_name in collection_packages: |
| 1052 | + raise click.ClickException( |
| 1053 | + f"Duplicate collection name '{collection_name}' from {graph_path}. " |
| 1054 | + "Rename one of the graph files to avoid ambiguity." |
| 1055 | + ) |
| 1056 | + try: |
| 1057 | + collection_graph = DependencyGraph.from_file(graph_path) |
| 1058 | + except Exception as err: |
| 1059 | + raise click.ClickException( |
| 1060 | + f"Failed to load collection graph {graph_path}: {err}" |
| 1061 | + ) from err |
| 1062 | + collection_packages[collection_name] = get_package_names(collection_graph) |
| 1063 | + logger.debug( |
| 1064 | + "Collection '%s': %d packages", |
| 1065 | + collection_name, |
| 1066 | + len(collection_packages[collection_name]), |
| 1067 | + ) |
| 1068 | + |
| 1069 | + results = _analyze_suggestions(toplevel_nodes, collection_packages) |
| 1070 | + |
| 1071 | + if output_format == "json": |
| 1072 | + click.echo(json.dumps(results, indent=2)) |
| 1073 | + else: |
| 1074 | + _print_suggest_collection_table(results) |
0 commit comments