|
7 | 7 | import typing |
8 | 8 |
|
9 | 9 | import click |
| 10 | +import rich |
| 11 | +import rich.box |
10 | 12 | from packaging.requirements import Requirement |
11 | | -from packaging.utils import canonicalize_name |
| 13 | +from packaging.utils import NormalizedName, canonicalize_name |
12 | 14 | from packaging.version import Version |
| 15 | +from rich.table import Table |
13 | 16 |
|
14 | 17 | from fromager import clickext, context |
15 | 18 | from fromager.commands import bootstrap |
@@ -784,3 +787,252 @@ def n2s(nodes: typing.Iterable[DependencyNode]) -> str: |
784 | 787 | topo.done(*nodes_to_build) |
785 | 788 |
|
786 | 789 | print(f"\nBuilding {len(graph)} packages in {rounds} rounds.") |
| 790 | + |
| 791 | + |
| 792 | +def get_dependency_closure(node: DependencyNode) -> set[NormalizedName]: |
| 793 | + """Compute the full dependency closure for a node. |
| 794 | +
|
| 795 | + Traverses all edge types and returns the set of canonical package names reachable from node, |
| 796 | + including node itself. |
| 797 | +
|
| 798 | + Args: |
| 799 | + node: The starting node to compute the closure for. |
| 800 | +
|
| 801 | + Returns: |
| 802 | + Set of canonicalized package names in the transitive closure. |
| 803 | + """ |
| 804 | + dependency_names: set[NormalizedName] = set() |
| 805 | + if node.canonicalized_name != ROOT: |
| 806 | + dependency_names.add(node.canonicalized_name) |
| 807 | + for dependency in node.iter_all_dependencies(): |
| 808 | + if dependency.canonicalized_name != ROOT: |
| 809 | + dependency_names.add(dependency.canonicalized_name) |
| 810 | + return dependency_names |
| 811 | + |
| 812 | + |
| 813 | +def get_package_names(graph: DependencyGraph) -> set[NormalizedName]: |
| 814 | + """Extract all unique canonical package names from a graph. |
| 815 | +
|
| 816 | + Args: |
| 817 | + graph: The dependency graph to extract names from. |
| 818 | +
|
| 819 | + Returns: |
| 820 | + Set of canonicalized package names, excluding the ROOT node. |
| 821 | + """ |
| 822 | + return { |
| 823 | + node.canonicalized_name for node in graph.get_all_nodes() if node.key != ROOT |
| 824 | + } |
| 825 | + |
| 826 | + |
| 827 | +def extract_collection_name(graph_path: str) -> str: |
| 828 | + """Derive a collection name from a graph file path. |
| 829 | +
|
| 830 | + Returns the filename without the extension as a string. |
| 831 | +
|
| 832 | + Args: |
| 833 | + graph_path: Filesystem path to a graph JSON file. |
| 834 | +
|
| 835 | + Returns: |
| 836 | + The filename without the extension. |
| 837 | + """ |
| 838 | + return pathlib.PurePath(graph_path).stem |
| 839 | + |
| 840 | + |
| 841 | +class _CollectionScore(typing.NamedTuple): |
| 842 | + """Overlap score between a package's dependency closure and a collection.""" |
| 843 | + |
| 844 | + collection: str |
| 845 | + new_packages: int |
| 846 | + existing_packages: int |
| 847 | + coverage_percentage: float |
| 848 | + |
| 849 | + |
| 850 | +def _analyze_suggestions( |
| 851 | + toplevel_nodes: list[DependencyNode], |
| 852 | + collection_packages: dict[str, set[NormalizedName]], |
| 853 | +) -> list[dict[str, typing.Any]]: |
| 854 | + """Score each onboarding top-level package against every collection. |
| 855 | +
|
| 856 | + Args: |
| 857 | + toplevel_nodes: Top-level nodes from the onboarding graph. |
| 858 | + collection_packages: Mapping of collection name to its package name set. |
| 859 | +
|
| 860 | + Returns: |
| 861 | + List of result dicts, one per top-level package, sorted by package name. |
| 862 | + """ |
| 863 | + results: list[dict[str, typing.Any]] = [] |
| 864 | + |
| 865 | + for node in sorted(toplevel_nodes, key=lambda n: n.canonicalized_name): |
| 866 | + dependency_names = get_dependency_closure(node) |
| 867 | + total_dependency_count = len(dependency_names) |
| 868 | + |
| 869 | + scores: list[_CollectionScore] = [] |
| 870 | + for collection_name, packages in collection_packages.items(): |
| 871 | + existing_count = len(dependency_names & packages) |
| 872 | + new_count = total_dependency_count - existing_count |
| 873 | + coverage_percentage = ( |
| 874 | + (existing_count / total_dependency_count * 100) |
| 875 | + if total_dependency_count |
| 876 | + else 0.0 |
| 877 | + ) |
| 878 | + scores.append( |
| 879 | + _CollectionScore( |
| 880 | + collection_name, new_count, existing_count, coverage_percentage |
| 881 | + ) |
| 882 | + ) |
| 883 | + |
| 884 | + # Rank: fewest new packages, then highest coverage, then name for determinism |
| 885 | + scores.sort( |
| 886 | + key=lambda score: ( |
| 887 | + score.new_packages, |
| 888 | + -score.coverage_percentage, |
| 889 | + score.collection, |
| 890 | + ) |
| 891 | + ) |
| 892 | + best_score = scores[0] if scores else None |
| 893 | + |
| 894 | + logger.debug( |
| 895 | + "%s: %d deps, best fit '%s' (%d new, %.1f%% coverage)", |
| 896 | + node.canonicalized_name, |
| 897 | + total_dependency_count, |
| 898 | + best_score.collection if best_score else "none", |
| 899 | + best_score.new_packages if best_score else 0, |
| 900 | + best_score.coverage_percentage if best_score else 0.0, |
| 901 | + ) |
| 902 | + |
| 903 | + results.append( |
| 904 | + { |
| 905 | + "package": str(node.canonicalized_name), |
| 906 | + "version": str(node.version), |
| 907 | + "total_dependencies": total_dependency_count, |
| 908 | + "best_fit": best_score.collection if best_score else "none", |
| 909 | + "new_packages": best_score.new_packages if best_score else 0, |
| 910 | + "existing_packages": best_score.existing_packages if best_score else 0, |
| 911 | + "coverage_percentage": ( |
| 912 | + round(best_score.coverage_percentage, 1) if best_score else 0.0 |
| 913 | + ), |
| 914 | + "all_collections": [ |
| 915 | + { |
| 916 | + "collection": score.collection, |
| 917 | + "new_packages": score.new_packages, |
| 918 | + "existing_packages": score.existing_packages, |
| 919 | + "coverage_percentage": round(score.coverage_percentage, 1), |
| 920 | + } |
| 921 | + for score in scores |
| 922 | + ], |
| 923 | + } |
| 924 | + ) |
| 925 | + |
| 926 | + return results |
| 927 | + |
| 928 | + |
| 929 | +def _print_suggest_collection_table( |
| 930 | + results: list[dict[str, typing.Any]], |
| 931 | +) -> None: |
| 932 | + """Render suggest-collection results as a Rich table.""" |
| 933 | + table = Table( |
| 934 | + title="Collection Suggestions for Onboarding Packages", |
| 935 | + box=rich.box.MARKDOWN, |
| 936 | + title_justify="left", |
| 937 | + ) |
| 938 | + table.add_column("Package", justify="left", no_wrap=True) |
| 939 | + table.add_column("Version", justify="left", no_wrap=True) |
| 940 | + table.add_column("Total Deps", justify="right", no_wrap=True) |
| 941 | + table.add_column("Best Fit", justify="left", no_wrap=True) |
| 942 | + table.add_column("New Pkgs", justify="right", no_wrap=True) |
| 943 | + table.add_column("Existing", justify="right", no_wrap=True) |
| 944 | + table.add_column("Coverage", justify="right", no_wrap=True) |
| 945 | + |
| 946 | + for result in results: |
| 947 | + table.add_row( |
| 948 | + result["package"], |
| 949 | + result["version"], |
| 950 | + str(result["total_dependencies"]), |
| 951 | + result["best_fit"], |
| 952 | + str(result["new_packages"]), |
| 953 | + str(result["existing_packages"]), |
| 954 | + f"{result['coverage_percentage']:.1f}%", |
| 955 | + ) |
| 956 | + |
| 957 | + rich.get_console().print(table) |
| 958 | + |
| 959 | + |
| 960 | +@graph.command(name="suggest-collection") |
| 961 | +@click.option( |
| 962 | + "--format", |
| 963 | + "output_format", |
| 964 | + type=click.Choice(["table", "json"], case_sensitive=False), |
| 965 | + default="table", |
| 966 | + help="Output format (default: table)", |
| 967 | +) |
| 968 | +@click.argument("onboarding-graph", type=str) |
| 969 | +@click.argument("collection-graphs", nargs=-1, required=True, type=str) |
| 970 | +def suggest_collection( |
| 971 | + output_format: str, |
| 972 | + onboarding_graph: str, |
| 973 | + collection_graphs: tuple[str, ...], |
| 974 | +) -> None: |
| 975 | + """Suggest the best-fit collection for each onboarding package. |
| 976 | +
|
| 977 | + Analyzes dependency overlap between top-level packages in ONBOARDING_GRAPH |
| 978 | + and the existing COLLECTION_GRAPHS to recommend where each onboarding |
| 979 | + package should be placed. |
| 980 | +
|
| 981 | + For each top-level package in the onboarding graph, computes the full |
| 982 | + transitive dependency closure and compares it against every collection. |
| 983 | + Collections are ranked by fewest new packages required, then by highest |
| 984 | + dependency coverage. |
| 985 | +
|
| 986 | + \b |
| 987 | + ONBOARDING_GRAPH Path to the onboarding collection graph.json. |
| 988 | + COLLECTION_GRAPHS One or more paths to existing collection graph.json files. |
| 989 | + """ |
| 990 | + try: |
| 991 | + onboarding = DependencyGraph.from_file(onboarding_graph) |
| 992 | + except Exception as err: |
| 993 | + raise click.ClickException( |
| 994 | + f"Failed to load onboarding graph {onboarding_graph}: {err}" |
| 995 | + ) from err |
| 996 | + |
| 997 | + root = onboarding.get_root_node() |
| 998 | + |
| 999 | + toplevel_nodes: list[DependencyNode] = [ |
| 1000 | + edge.destination_node |
| 1001 | + for edge in root.children |
| 1002 | + if edge.req_type == RequirementType.TOP_LEVEL |
| 1003 | + ] |
| 1004 | + |
| 1005 | + if not toplevel_nodes: |
| 1006 | + click.echo("No top-level packages found in onboarding graph.", err=True) |
| 1007 | + |
| 1008 | + logger.info( |
| 1009 | + "Loaded onboarding graph with %d top-level packages", len(toplevel_nodes) |
| 1010 | + ) |
| 1011 | + |
| 1012 | + collection_packages: dict[str, set[NormalizedName]] = {} |
| 1013 | + for graph_path in collection_graphs: |
| 1014 | + collection_name = extract_collection_name(graph_path) |
| 1015 | + if collection_name in collection_packages: |
| 1016 | + raise click.ClickException( |
| 1017 | + f"Duplicate collection name '{collection_name}' from {graph_path}. " |
| 1018 | + "Rename one of the graph files to avoid ambiguity." |
| 1019 | + ) |
| 1020 | + try: |
| 1021 | + collection_graph = DependencyGraph.from_file(graph_path) |
| 1022 | + except Exception as err: |
| 1023 | + raise click.ClickException( |
| 1024 | + f"Failed to load collection graph {graph_path}: {err}" |
| 1025 | + ) from err |
| 1026 | + collection_packages[collection_name] = get_package_names(collection_graph) |
| 1027 | + logger.debug( |
| 1028 | + "Collection '%s': %d packages", |
| 1029 | + collection_name, |
| 1030 | + len(collection_packages[collection_name]), |
| 1031 | + ) |
| 1032 | + |
| 1033 | + results = _analyze_suggestions(toplevel_nodes, collection_packages) |
| 1034 | + |
| 1035 | + if output_format == "json": |
| 1036 | + click.echo(json.dumps(results, indent=2)) |
| 1037 | + else: |
| 1038 | + _print_suggest_collection_table(results) |
0 commit comments