Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 12 additions & 26 deletions cyclonedx_py/_internal/utils/cdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from cyclonedx.model.license import DisjunctiveLicense, License, LicenseAcknowledgement, LicenseExpression

from ... import __version__ as _THIS_VERSION # noqa:N812
from .url_classifiers import _MAP_KNOWN_URL_LABELS, _MAP_URL_LABEL_PREFIXES


def make_bom(**kwargs: Any) -> Bom:
Expand Down Expand Up @@ -119,32 +120,17 @@ def licenses_fixup(component: 'Component') -> None:
component.evidence.licenses.update(licenses)


_MAP_KNOWN_URL_LABELS: dict[str, ExternalReferenceType] = {
# see https://peps.python.org/pep-0345/#project-url-multiple-use
# see https://github.com/pypi/warehouse/issues/5947#issuecomment-699660629
'bugtracker': ExternalReferenceType.ISSUE_TRACKER,
'issuetracker': ExternalReferenceType.ISSUE_TRACKER,
'issues': ExternalReferenceType.ISSUE_TRACKER,
'bugreports': ExternalReferenceType.ISSUE_TRACKER,
'tracker': ExternalReferenceType.ISSUE_TRACKER,
'home': ExternalReferenceType.WEBSITE,
'homepage': ExternalReferenceType.WEBSITE,
'download': ExternalReferenceType.DISTRIBUTION,
'documentation': ExternalReferenceType.DOCUMENTATION,
'docs': ExternalReferenceType.DOCUMENTATION,
'changelog': ExternalReferenceType.RELEASE_NOTES,
'changes': ExternalReferenceType.RELEASE_NOTES,
# 'source': ExternalReferenceType.SOURCE-DISTRIBUTION,
'repository': ExternalReferenceType.VCS,
'github': ExternalReferenceType.VCS,
'chat': ExternalReferenceType.CHAT,
}

_NOCHAR_MATCHER = re_compile('[^a-z]')


def url_label_to_ert(value: str) -> ExternalReferenceType:
return _MAP_KNOWN_URL_LABELS.get(
_NOCHAR_MATCHER.sub('', str(value).lower()),
ExternalReferenceType.OTHER
)
def url_label_to_ert(label: str, url: Optional[str] = None) -> ExternalReferenceType:
norm = _NOCHAR_MATCHER.sub('', str(label).lower())
# 1. exact label
ert = _MAP_KNOWN_URL_LABELS.get(norm)
if ert is not None:
return ert
# 2. label prefix
for prefix, pert in _MAP_URL_LABEL_PREFIXES:
if norm.startswith(prefix):
return pert
return ExternalReferenceType.OTHER
76 changes: 76 additions & 0 deletions cyclonedx_py/_internal/utils/url_classifiers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# This file is part of CycloneDX Python
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) OWASP Foundation. All Rights Reserved.


"""
Pure mapping data for URL -> ExternalReferenceType classification.

This module is DATA ONLY -- no logic. To extend classification, add rows here.
Four match styles, applied by ``cdx.url_label_to_ert`` in this precedence order:

1. _MAP_KNOWN_URL_LABELS exact label (normalized: lowercased, non-[a-z] stripped)
2. _MAP_URL_LABEL_PREFIXES label prefix (PyPI '*' semantics); first match wins
3. _MAP_KNOWN_URL_HOST_SUFFIXES host == key OR host endswith '.'+key (domain + subdomains)
4. _MAP_KNOWN_URL_HOST_PREFIXES host == key OR host startswith key+'.' (e.g. docs.*)

Label keys MUST already be normalized (lowercase, only [a-z]).
Host keys MUST be lowercase.

see https://docs.pypi.org/project_metadata/#icons
"""

from cyclonedx.model import ExternalReferenceType

# 1. exact label -> ERT
_MAP_KNOWN_URL_LABELS: dict[str, ExternalReferenceType] = {
# see https://peps.python.org/pep-0345/#project-url-multiple-use
# see https://github.com/pypi/warehouse/issues/5947#issuecomment-699660629
'bugtracker': ExternalReferenceType.ISSUE_TRACKER,
'issuetracker': ExternalReferenceType.ISSUE_TRACKER,
'issues': ExternalReferenceType.ISSUE_TRACKER,
'bugreports': ExternalReferenceType.ISSUE_TRACKER,
'tracker': ExternalReferenceType.ISSUE_TRACKER,
'home': ExternalReferenceType.WEBSITE,
'homepage': ExternalReferenceType.WEBSITE,
'download': ExternalReferenceType.DISTRIBUTION,
'documentation': ExternalReferenceType.DOCUMENTATION,
'docs': ExternalReferenceType.DOCUMENTATION,
'changelog': ExternalReferenceType.RELEASE_NOTES,
'changes': ExternalReferenceType.RELEASE_NOTES,
'releasenotes': ExternalReferenceType.RELEASE_NOTES,
'news': ExternalReferenceType.RELEASE_NOTES,
'whatsnew': ExternalReferenceType.RELEASE_NOTES,
'history': ExternalReferenceType.RELEASE_NOTES,
'repository': ExternalReferenceType.VCS,
'source': ExternalReferenceType.VCS,
'github': ExternalReferenceType.VCS,
'chat': ExternalReferenceType.CHAT,
}

# 2. label prefix -> ERT (ordered; first match wins). normalized prefixes.
_MAP_URL_LABEL_PREFIXES: tuple[tuple[str, ExternalReferenceType], ...] = (
('documentation', ExternalReferenceType.DOCUMENTATION),
('docs', ExternalReferenceType.DOCUMENTATION),
('bug', ExternalReferenceType.ISSUE_TRACKER),
('issue', ExternalReferenceType.ISSUE_TRACKER),
('tracker', ExternalReferenceType.ISSUE_TRACKER),
('report', ExternalReferenceType.ISSUE_TRACKER),
('funding', ExternalReferenceType.OTHER),
('sponsor', ExternalReferenceType.OTHER),
('donation', ExternalReferenceType.OTHER),
('donate', ExternalReferenceType.OTHER),
)
41 changes: 40 additions & 1 deletion tests/unit/test_utils_cdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@
from cyclonedx.model import ExternalReference, ExternalReferenceType
from cyclonedx.model.component import Component, ComponentType
from cyclonedx.model.license import License, LicenseAcknowledgement
from ddt import data, ddt, unpack

from cyclonedx_py._internal.utils.cdx import make_bom
from cyclonedx_py._internal.utils.cdx import make_bom, url_label_to_ert
from tests import EXPECTED_TOOL_NAME, load_pyproject


Expand Down Expand Up @@ -79,3 +80,41 @@ def test_extrefs(self) -> None:
c = self.__get_c_by_name(EXPECTED_TOOL_NAME)
ers: tuple[ExternalReference, ...] = tuple(c.external_references)
self.assertExtRefs(p, ers)


@ddt
class TestUrlLabelToErt(TestCase):

@data(
# exact labels (existing behaviour preserved)
('Homepage', ExternalReferenceType.WEBSITE),
('Home', ExternalReferenceType.WEBSITE),
('Download', ExternalReferenceType.DISTRIBUTION),
('Changelog', ExternalReferenceType.RELEASE_NOTES),
('Change log', ExternalReferenceType.RELEASE_NOTES),
('Release notes', ExternalReferenceType.RELEASE_NOTES),
("What's new", ExternalReferenceType.RELEASE_NOTES),
('History', ExternalReferenceType.RELEASE_NOTES),
('Repository', ExternalReferenceType.VCS),
('Source', ExternalReferenceType.VCS),
('Chat', ExternalReferenceType.CHAT),
# prefix labels (PyPI '*' semantics)
('Documentation', ExternalReferenceType.DOCUMENTATION),
('Documentation for users', ExternalReferenceType.DOCUMENTATION),
('Docs (latest)', ExternalReferenceType.DOCUMENTATION),
('Bug Reports', ExternalReferenceType.ISSUE_TRACKER),
('Issue Tracker', ExternalReferenceType.ISSUE_TRACKER),
('Tracker', ExternalReferenceType.ISSUE_TRACKER),
('Report a bug', ExternalReferenceType.ISSUE_TRACKER),
('Funding', ExternalReferenceType.OTHER),
('Sponsor this project', ExternalReferenceType.OTHER),
('Donate', ExternalReferenceType.OTHER),
# unknown -> OTHER
('Some Random Label', ExternalReferenceType.OTHER),
)
@unpack
def test_label_only(self, label: str, expected: ExternalReferenceType) -> None:
self.assertIs(expected, url_label_to_ert(label))

def test_label_only_url_none_backcompat(self) -> None:
self.assertIs(ExternalReferenceType.WEBSITE, url_label_to_ert('Homepage', None))