Skip to content

Commit e907036

Browse files
committed
feat: license and project url metadata helpers
Add helpers to deal with project URLs and license information in package metadata. `normalize_project_urls` and `project_urls_from_metadata` performs PEP 753 URL normalization. `license_from_metadata` and `license_from_metadata_values` deal with new license expression string as well as legacy `license` and trove classifiers. The functions map unambiguous strings and classifiers to SPDX license expressions. Signed-off-by: Christian Heimes <cheimes@redhat.com>
1 parent 81d7024 commit e907036

5 files changed

Lines changed: 452 additions & 1 deletion

File tree

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ requires-python = ">=3.11"
3333
dependencies = [
3434
"click>=8.1.7",
3535
"elfdeps>=0.2.0",
36+
"license-expression",
3637
"packaging",
3738
"pkginfo",
3839
"psutil",
@@ -204,7 +205,7 @@ exclude = [
204205

205206
[[tool.mypy.overrides]]
206207
# packages without typing annotations and stubs
207-
module = ["pyproject_hooks", "requests_mock", "resolver", "stevedore"]
208+
module = ["license_expression", "pyproject_hooks", "requests_mock", "resolver", "stevedore"]
208209
ignore_missing_imports = true
209210

210211
[tool.basedpyright]
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
"""Package metadata helpers
2+
3+
- PEP 639-based license detection
4+
- PEP 753 project URL normalization
5+
"""
6+
7+
from .pep639 import license_from_metadata, license_from_metadata_values
8+
from .pep753 import normalize_project_urls, project_urls_from_metadata
9+
10+
__all__ = (
11+
"license_from_metadata",
12+
"license_from_metadata_values",
13+
"normalize_project_urls",
14+
"project_urls_from_metadata",
15+
)

src/fromager/pkgmetadata/pep639.py

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
"""Map common license strings to SPDF
2+
3+
- https://peps.python.org/pep-0639/
4+
- https://github.com/pypa/trove-classifiers
5+
- https://github.com/pypa/trove-classifiers/issues/17#issuecomment-385027197
6+
- https://spdx.org/licenses/
7+
"""
8+
9+
from license_expression import LicenseExpression, get_spdx_licensing
10+
from packaging.metadata import Metadata
11+
12+
13+
def license_from_metadata(metadata: Metadata) -> LicenseExpression:
14+
"""Detect license from packaging metadata"""
15+
return license_from_metadata_values(
16+
license_expression=metadata.license_expression,
17+
license_text=metadata.license,
18+
classifiers=metadata.classifiers,
19+
)
20+
21+
22+
def license_from_metadata_values(
23+
*,
24+
license_expression: str | None = None,
25+
license_text: str | None = None,
26+
classifiers: list[str] | None = None,
27+
) -> LicenseExpression:
28+
"""Detect license from metadata values
29+
30+
1. Prefer *license_expression*
31+
2. Fall back to *license_text*. Perform some unambiguous translatons
32+
(e.g. ``Apache 2`` to ``Apache-2.0``) and attempt to parse the string
33+
as SPDX license expression.
34+
3. Finally fall back to trove classifiers.
35+
36+
Raises an exception if license is missing, ambiguous, or not a valid
37+
SPDX license expression.
38+
"""
39+
if not license_expression and not license_text and not classifiers:
40+
raise ValueError("license expression, text, and classifiers are empty")
41+
42+
if license_expression:
43+
return _parse_spdx(license_expression)
44+
45+
errors: list[Exception] = []
46+
if license_text:
47+
try:
48+
return _license_text_to_spdx(license_text)
49+
except ValueError as e:
50+
errors.append(e)
51+
52+
if classifiers:
53+
try:
54+
return _trove_to_spdx(classifiers)
55+
except ValueError as e:
56+
errors.append(e)
57+
58+
raise ExceptionGroup("unable to detect license", errors)
59+
60+
61+
_SPDX = get_spdx_licensing()
62+
63+
64+
def _parse_spdx(text: str, *, simplify=False) -> LicenseExpression:
65+
"""Parse, validate, and simplify a SPDX license expression"""
66+
# LicenseRef are references to non-SPDX licenses
67+
validate = not text.startswith("LicenseRef-")
68+
expr = _SPDX.parse(text, validate=validate)
69+
if simplify:
70+
expr = expr.simplify()
71+
return expr
72+
73+
74+
def _trove_to_spdx(troves: list[str]) -> LicenseExpression:
75+
"""Convert unambiguous trove classifiers to SPDX"""
76+
trove_spdx: list[str] = []
77+
for trove in troves:
78+
if trove not in _TROVE_SPDX:
79+
continue
80+
mapped: str | None = _TROVE_SPDX.get(trove)
81+
if mapped is None:
82+
raise ValueError(f"{trove!r} is ambiguous")
83+
trove_spdx.append(mapped)
84+
# join with AND
85+
return _parse_spdx(" AND ".join(trove_spdx), simplify=True)
86+
87+
88+
def _license_text_to_spdx(text: str) -> LicenseExpression:
89+
"""Convert unambiguous strings to SPDX"""
90+
text = text.strip()
91+
text = _LICENSE_STRING_TO_SPDX.get(text, text)
92+
try:
93+
return _parse_spdx(text)
94+
except Exception:
95+
pass
96+
raise ValueError(text[:100])
97+
98+
99+
# unambiguous text to SPDX
100+
# The keys are common cases seen in the wild on PyPI.org
101+
_LICENSE_STRING_TO_SPDX: dict[str, str] = {
102+
"http://opensource.org/licenses/MIT": "MIT",
103+
"MIT License": "MIT",
104+
"MIT license": "MIT",
105+
"Apache 2.0": "Apache-2.0",
106+
"Apache 2": "Apache-2.0",
107+
"Apache License, Version 2.0": "Apache-2.0",
108+
"Apache Software License 2.0": "Apache-2.0",
109+
"Apache License 2.0": "Apache-2.0",
110+
"Apache License Version 2.0": "Apache-2.0",
111+
"GPLv3+": "GPL-3.0-or-later",
112+
"BSD 3-Clause License": "BSD-3-Clause",
113+
"BSD-3-Clause License": "BSD-3-Clause",
114+
"3-clause BSD": "BSD-3-Clause",
115+
"3-clause BSD License": "BSD-3-Clause",
116+
"ISC License": "ISC",
117+
"ISC license": "ISC",
118+
"NVIDIA Proprietary Software": "LicenseRef-NVIDIA-SOFTWARE-LICENSE",
119+
}
120+
121+
122+
# PyPA trove to SPDX
123+
# Several trove classifiers can be mapped to an SPDX license expression. Some
124+
# classifiers are ambiguous, e.g. 'BSD' or 'GPL'. The classifiers do not
125+
# include license versions and extra clauses.
126+
_TROVE_SPDX: dict[str, str | None] = {
127+
"License :: Aladdin Free Public License (AFPL)": "Aladdin",
128+
"License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication": "CC0-1.0",
129+
"License :: CeCILL-B Free Software License Agreement (CECILL-B)": "CECILL-B",
130+
"License :: CeCILL-C Free Software License Agreement (CECILL-C)": "CECILL-C",
131+
# not a license
132+
# "License :: DFSG approved": None,
133+
# multiple versions: EFL-1.0, EFL21.0
134+
"License :: Eiffel Forum License (EFL)": None,
135+
"License :: Free For Educational Use": None,
136+
"License :: Free For Home Use": None,
137+
"License :: Free To Use But Restricted": None,
138+
"License :: Free for non-commercial use": None,
139+
"License :: Freely Distributable": None,
140+
"License :: Freeware": None,
141+
"License :: GUST Font License 1.0": None,
142+
"License :: GUST Font License 2006-09-30": None,
143+
# multiple versions: NPL-1.0, NPL-1.1
144+
"License :: Netscape Public License (NPL)": None,
145+
"License :: Nokia Open Source License (NOKOS)": "Nokia",
146+
# not a license
147+
# "License :: OSI Approved": None,
148+
# multiple versions: AFL-1.1, AFL-1.2, AFL-2.0, AFL-2.1, AFL-3.0
149+
"License :: OSI Approved :: Academic Free License (AFL)": None,
150+
# multiple versions: Apache-1.0, Apache-1.1, Apache-2.0
151+
"License :: OSI Approved :: Apache Software License": None,
152+
# multiple versions: APSL-1.0, APSL-1.1, APSL-1.2, APSL-2.0
153+
"License :: OSI Approved :: Apple Public Source License": None,
154+
# multiple versions: Artistic-1.0, Artistic-2.0
155+
"License :: OSI Approved :: Artistic License": None,
156+
"License :: OSI Approved :: Attribution Assurance License": "AAL",
157+
# multiple versions and extra clauses
158+
"License :: OSI Approved :: BSD License": None,
159+
"License :: OSI Approved :: Blue Oak Model License (BlueOak-1.0.0)": "BlueOak-1.0.0",
160+
"License :: OSI Approved :: Boost Software License 1.0 (BSL-1.0)": "BSL-1.0",
161+
"License :: OSI Approved :: CEA CNRS Inria Logiciel Libre License, version 2.1 (CeCILL-2.1)": "CeCILL-2.1",
162+
"License :: OSI Approved :: CMU License (MIT-CMU)": "MIT-CMU",
163+
"License :: OSI Approved :: Common Development and Distribution License 1.0 (CDDL-1.0)": "CDDL-1.0",
164+
"License :: OSI Approved :: Common Public License": "CPL-1.0",
165+
"License :: OSI Approved :: Eclipse Public License 1.0 (EPL-1.0)": "EPL-1.0",
166+
"License :: OSI Approved :: Eclipse Public License 2.0 (EPL-2.0)": "EPL-2.0",
167+
"License :: OSI Approved :: Educational Community License, Version 2.0 (ECL-2.0)": "ECL-2.0",
168+
# multiple versions
169+
"License :: OSI Approved :: Eiffel Forum License": None,
170+
"License :: OSI Approved :: European Union Public Licence 1.0 (EUPL 1.0)": "EUPL-1.0",
171+
"License :: OSI Approved :: European Union Public Licence 1.1 (EUPL 1.1)": "EUPL-1.1",
172+
"License :: OSI Approved :: European Union Public Licence 1.2 (EUPL 1.2)": "EUPL-1.2",
173+
"License :: OSI Approved :: GNU Affero General Public License v3": "AGPL-3.0-only",
174+
"License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)": "AGPL-3.0-or-later",
175+
# multiple versions
176+
"License :: OSI Approved :: GNU Free Documentation License (FDL)": None,
177+
# multiple versions
178+
"License :: OSI Approved :: GNU General Public License (GPL)": None,
179+
# ambigious, see PEP 639
180+
"License :: OSI Approved :: GNU General Public License v2 (GPLv2)": None,
181+
"License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)": "GPL-2.0-or-later",
182+
# ambigious, see PEP 639
183+
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)": None,
184+
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)": "GPL-3.0-or-later",
185+
# ambigious, see PEP 639
186+
"License :: OSI Approved :: GNU Lesser General Public License v2 (LGPLv2)": None,
187+
# ambigious, see PEP 639
188+
"License :: OSI Approved :: GNU Lesser General Public License v2 or later (LGPLv2+)": None,
189+
# ambigious, see PEP 639
190+
"License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)": None,
191+
"License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)": "LGPL-3.0-or-later",
192+
# ambigious, see PEP 639
193+
"License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)": None,
194+
# multiple versions
195+
"License :: OSI Approved :: Historical Permission Notice and Disclaimer (HPND)": None,
196+
"License :: OSI Approved :: IBM Public License": "IPL-1.0",
197+
"License :: OSI Approved :: ISC License (ISCL)": "ISC",
198+
"License :: OSI Approved :: MIT License": "MIT",
199+
"License :: OSI Approved :: MIT No Attribution License (MIT-0)": "MIT-0",
200+
"License :: OSI Approved :: MirOS License (MirOS)": "MirOS",
201+
"License :: OSI Approved :: Motosoto License": "Motosoto",
202+
"License :: OSI Approved :: Mozilla Public License 1.0 (MPL)": "MPL-1.0",
203+
"License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)": "MPL-1.1",
204+
"License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)": "MPL-2.0",
205+
"License :: OSI Approved :: Mulan Permissive Software License v2 (MulanPSL-2.0)": "MulanPSL-2.0",
206+
"License :: OSI Approved :: NASA Open Source Agreement v1.3 (NASA-1.3)": "NASA-1.3",
207+
"License :: OSI Approved :: Nethack General Public License": "NGPL",
208+
"License :: OSI Approved :: Nokia Open Source License": "Nokia",
209+
"License :: OSI Approved :: Open Group Test Suite License": "OGTSL",
210+
"License :: OSI Approved :: Open Software License 3.0 (OSL-3.0)": "OSL-3.0",
211+
"License :: OSI Approved :: PostgreSQL License": "PostgreSQL",
212+
"License :: OSI Approved :: Python License (CNRI Python License)": "CNRI-Python",
213+
"License :: OSI Approved :: Python Software Foundation License": "PSF-2.0",
214+
"License :: OSI Approved :: Qt Public License (QPL)": "QPL-1.0",
215+
"License :: OSI Approved :: Ricoh Source Code Public License": "RSCPL",
216+
"License :: OSI Approved :: SIL Open Font License 1.1 (OFL-1.1)": "OFL-1.1",
217+
"License :: OSI Approved :: Sleepycat License": "Sleepycat",
218+
"License :: OSI Approved :: Sun Public License": "SPL-1.0",
219+
"License :: OSI Approved :: The Unlicense (Unlicense)": "Unlicense",
220+
"License :: OSI Approved :: Universal Permissive License (UPL)": "UPL-1.0",
221+
"License :: OSI Approved :: University of Illinois/NCSA Open Source License": "NCSA",
222+
"License :: OSI Approved :: Vovida Software License 1.0": "VSL-1.0",
223+
"License :: OSI Approved :: W3C License": "W3C",
224+
"License :: OSI Approved :: Zero-Clause BSD (0BSD)": "0BSD",
225+
# multiple versions: ZPL-1.1, ZPL-2.0, ZPL-2.1
226+
"License :: OSI Approved :: Zope Public License": None,
227+
"License :: OSI Approved :: zlib/libpng License": "zlib-acknowledgement",
228+
"License :: Other/Proprietary License": None,
229+
"License :: Public Domain": None,
230+
"License :: Repoze Public License": None,
231+
}

src/fromager/pkgmetadata/pep753.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
"""Normalize PEP 753 project URLs
2+
3+
https://peps.python.org/pep-0753/
4+
"""
5+
6+
import string
7+
import typing
8+
9+
from packaging.metadata import Metadata
10+
11+
_PROJECT_URL_MAP: dict[str, str] = {
12+
# homepage
13+
"homepage": "homepage",
14+
# source
15+
"source": "source",
16+
"repository": "source",
17+
"sourcecode": "source",
18+
"github": "source",
19+
# download
20+
"download": "download",
21+
# changelog
22+
"changelog": "changelog",
23+
"changes": "changelog",
24+
"whatsnew": "changelog",
25+
"history": "changelog",
26+
# releasenotes
27+
"releasenotes": "releasenotes",
28+
# documentation
29+
"documentation": "documentation",
30+
"docs": "documentation",
31+
# issues
32+
"issues": "issues",
33+
"bugs": "issues",
34+
"issue": "issues",
35+
"tracker": "issues",
36+
"issuetracker": "issues",
37+
"bugtracker": "issues",
38+
# funding
39+
"funding": "funding",
40+
"sponsor": "funding",
41+
"donate": "funding",
42+
"donation": "funding",
43+
}
44+
45+
_REMOVAL_MAP = str.maketrans("", "", string.punctuation + string.whitespace)
46+
47+
48+
def normalize_pep753_label(label: str) -> str:
49+
"""Normalize a label"""
50+
# https://peps.python.org/pep-0753/#label-normalization
51+
translated: str = label.strip().translate(_REMOVAL_MAP).lower()
52+
mapped: str | None = _PROJECT_URL_MAP.get(translated)
53+
if mapped is not None:
54+
return mapped
55+
return label
56+
57+
58+
def normalize_project_urls(
59+
project_urls: typing.Iterable[tuple[str, str]],
60+
) -> list[tuple[str, str]]:
61+
"""Normalize project urls
62+
63+
Entries are normalized, sorted, and duplicate key/value pairs are removed.
64+
A key can appear multiple times, e.g. two ``homepage`` entries.
65+
"""
66+
return sorted(set((normalize_pep753_label(k), v) for k, v in project_urls))
67+
68+
69+
def project_urls_from_metadata(metadata: Metadata) -> list[tuple[str, str]]:
70+
"""Get normalized project URLs from package metadata
71+
72+
Takes ``project_urls``, ``home_page``, and ``download_url`` into account.
73+
"""
74+
urls: list[tuple[str, str]] = []
75+
if metadata.project_urls:
76+
urls.extend(metadata.project_urls.items())
77+
if metadata.home_page is not None:
78+
urls.append(("homepage", metadata.home_page))
79+
if metadata.download_url is not None:
80+
urls.append(("download", metadata.download_url))
81+
return normalize_project_urls(urls)

0 commit comments

Comments
 (0)