Skip to content

Commit 19a1434

Browse files
authored
Merge pull request #607 from LalatenduMohanty/issue_561
Replaceing the metadata parser from packaging.metadata
2 parents b5b6f7f + c72c58d commit 19a1434

6 files changed

Lines changed: 286 additions & 46 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ dependencies = [
3636
"elfdeps>=0.2.0",
3737
"license-expression",
3838
"packaging",
39-
"pkginfo",
4039
"psutil",
4140
"pydantic",
4241
"pypi_simple",
@@ -204,7 +203,7 @@ exclude = [
204203

205204
[[tool.mypy.overrides]]
206205
# packages without typing annotations and stubs
207-
module = ["license_expression", "pyproject_hooks", "requests_mock", "resolver", "stevedore"]
206+
module = ["hatchling", "hatchling.build", "license_expression", "pyproject_hooks", "requests_mock", "resolver", "stevedore"]
208207
ignore_missing_imports = true
209208

210209
[tool.basedpyright]

src/fromager/bootstrapper.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
import tempfile
1313
import typing
1414
import zipfile
15-
from email.parser import BytesParser
1615
from urllib.parse import urlparse
1716

1817
from packaging.requirements import Requirement
@@ -1234,10 +1233,10 @@ def _get_version_from_package_metadata(
12341233
config_settings=pbi.config_settings,
12351234
)
12361235
metadata_filename = source_dir.parent / metadata_dir_base / "METADATA"
1237-
with open(metadata_filename, "rb") as f:
1238-
p = BytesParser()
1239-
metadata = p.parse(f, headersonly=True)
1240-
return Version(metadata["Version"])
1236+
# Disable validation because some packages have metadata version mismatches
1237+
# (e.g., declaring Metadata-Version: 2.2 but using fields from 2.4).
1238+
metadata = dependencies.parse_metadata(metadata_filename, validate=False)
1239+
return metadata.version
12411240

12421241
def _create_unpack_dir(
12431242
self, req: Requirement, resolved_version: Version

src/fromager/candidate.py

Lines changed: 20 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,10 @@
22
import datetime
33
import logging
44
import typing
5-
from email.message import EmailMessage, Message
6-
from email.parser import BytesParser
75
from io import BytesIO
8-
from typing import TYPE_CHECKING
96
from zipfile import ZipFile
107

8+
from packaging.metadata import Metadata
119
from packaging.requirements import Requirement
1210
from packaging.utils import BuildTag, canonicalize_name
1311
from packaging.version import Version
@@ -16,13 +14,6 @@
1614

1715
logger = logging.getLogger(__name__)
1816

19-
# fix for runtime errors caused by inheriting classes that are generic in stubs but not runtime
20-
# https://mypy.readthedocs.io/en/latest/runtime_troubles.html#using-classes-that-are-generic-in-stubs-but-not-at-runtime
21-
if TYPE_CHECKING:
22-
Metadata = Message[str, str]
23-
else:
24-
Metadata = Message
25-
2617

2718
@dataclasses.dataclass(frozen=True, order=True, slots=True, repr=False, kw_only=True)
2819
class Candidate:
@@ -73,11 +64,10 @@ def metadata(self) -> Metadata:
7364
return self._metadata
7465

7566
def _get_dependencies(self) -> typing.Iterable[Requirement]:
76-
deps = self.metadata.get_all("Requires-Dist", [])
67+
deps = self.metadata.requires_dist or []
7768
extras = self.extras if self.extras else [""]
7869

79-
for d in deps:
80-
r = Requirement(d)
70+
for r in deps:
8171
if r.marker is None:
8272
yield r
8373
else:
@@ -95,19 +85,22 @@ def dependencies(self) -> list[Requirement]:
9585

9686
@property
9787
def requires_python(self) -> str | None:
98-
return self.metadata.get("Requires-Python")
88+
spec = self.metadata.requires_python
89+
return str(spec) if spec is not None else None
9990

10091

101-
def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadata:
102-
"""
103-
Get metadata for a wheel, supporting PEP 658 metadata endpoints.
92+
def get_metadata_for_wheel(
93+
url: str, metadata_url: str | None = None, *, validate: bool = True
94+
) -> Metadata:
95+
"""Get metadata for a wheel, supporting PEP 658 metadata endpoints.
10496
10597
Args:
10698
url: URL of the wheel file
10799
metadata_url: Optional URL of the metadata file (PEP 658)
100+
validate: Whether to validate metadata (default: True)
108101
109102
Returns:
110-
Parsed metadata as a Message object
103+
Parsed metadata as a Metadata object
111104
"""
112105
# Try PEP 658 metadata endpoint first if available
113106
if metadata_url:
@@ -118,9 +111,9 @@ def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadat
118111
response = session.get(metadata_url)
119112
response.raise_for_status()
120113

121-
# Parse metadata directly from the response content
122-
p = BytesParser()
123-
metadata = p.parse(BytesIO(response.content), headersonly=True)
114+
# Parse metadata directly using packaging.metadata.Metadata
115+
# (avoiding circular import with dependencies module)
116+
metadata = Metadata.from_email(response.content, validate=validate)
124117
logger.debug(f"Successfully retrieved metadata via PEP 658 for {url}")
125118
return metadata
126119

@@ -136,8 +129,10 @@ def get_metadata_for_wheel(url: str, metadata_url: str | None = None) -> Metadat
136129
with ZipFile(BytesIO(data)) as z:
137130
for n in z.namelist():
138131
if n.endswith(".dist-info/METADATA"):
139-
p = BytesParser()
140-
return p.parse(z.open(n), headersonly=True)
132+
metadata_content = z.read(n)
133+
# Parse metadata directly using packaging.metadata.Metadata
134+
# (avoiding circular import with dependencies module)
135+
return Metadata.from_email(metadata_content, validate=validate)
141136

142-
# If we didn't find the metadata, return an empty dict
143-
return EmailMessage()
137+
# If we didn't find the metadata, raise an error
138+
raise ValueError(f"Could not find METADATA file in wheel: {url}")

src/fromager/dependencies.py

Lines changed: 68 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
import pathlib
77
import tempfile
88
import typing
9+
import zipfile
910

10-
import pkginfo
1111
import pyproject_hooks
1212
import tomlkit
1313
from packaging.metadata import Metadata
1414
from packaging.requirements import Requirement
15-
from packaging.utils import NormalizedName, canonicalize_name
15+
from packaging.utils import NormalizedName, canonicalize_name, parse_wheel_filename
1616
from packaging.version import Version
1717

1818
from . import (
@@ -344,14 +344,23 @@ def default_get_install_dependencies_of_sdist(
344344
return set(metadata.requires_dist)
345345

346346

347-
def parse_metadata(metadata_file: pathlib.Path, *, validate: bool = True) -> Metadata:
348-
"""Parse a dist-info/METADATA file
347+
def parse_metadata(
348+
metadata_source: pathlib.Path | bytes, *, validate: bool = True
349+
) -> Metadata:
350+
"""Parse metadata from a file path or bytes.
351+
352+
Args:
353+
metadata_source: Path to METADATA file or bytes containing metadata
354+
validate: Whether to validate metadata (default: True)
349355
350-
The default parse mode is 'strict'. It even fails for a mismatch of field
351-
and core metadata version, e.g. a package with metadata 2.2 and
352-
license-expression field (added in 2.4).
356+
Returns:
357+
Parsed Metadata object
353358
"""
354-
return Metadata.from_email(metadata_file.read_bytes(), validate=validate)
359+
if isinstance(metadata_source, pathlib.Path):
360+
metadata_bytes = metadata_source.read_bytes()
361+
else:
362+
metadata_bytes = metadata_source
363+
return Metadata.from_email(metadata_bytes, validate=validate)
355364

356365

357366
def pep517_metadata_of_sdist(
@@ -418,16 +427,65 @@ def validate_dist_name_version(
418427
def get_install_dependencies_of_wheel(
419428
req: Requirement, wheel_filename: pathlib.Path, requirements_file_dir: pathlib.Path
420429
) -> set[Requirement]:
430+
"""Get install dependencies from a wheel file.
431+
432+
Extracts and parses the METADATA file from the wheel to get the
433+
Requires-Dist entries.
434+
435+
Args:
436+
req: The requirement being processed
437+
wheel_filename: Path to the wheel file
438+
requirements_file_dir: Directory to write the requirements file
439+
440+
Returns:
441+
Set of requirements from the wheel's metadata
442+
"""
421443
logger.info(f"getting installation dependencies from {wheel_filename}")
422-
wheel = pkginfo.Wheel(str(wheel_filename))
423-
deps = _filter_requirements(req, wheel.requires_dist)
444+
# Disable validation because many third-party packages have metadata version
445+
# mismatches (e.g., setuptools declares Metadata-Version: 2.2 but uses
446+
# license-file which was introduced in 2.4). The old pkginfo library
447+
# didn't validate this, so we maintain backward compatibility.
448+
metadata = _get_metadata_from_wheel(wheel_filename, validate=False)
449+
requires_dist = metadata.requires_dist or []
450+
deps = _filter_requirements(req, requires_dist)
424451
_write_requirements_file(
425452
deps,
426453
requirements_file_dir / INSTALL_REQ_FILE_NAME,
427454
)
428455
return deps
429456

430457

458+
def _get_metadata_from_wheel(
459+
wheel_filename: pathlib.Path, *, validate: bool = True
460+
) -> Metadata:
461+
"""Extract and parse METADATA from a wheel file.
462+
463+
Args:
464+
wheel_filename: Path to the wheel file
465+
validate: Whether to validate metadata (default: True)
466+
467+
Returns:
468+
Parsed Metadata object
469+
470+
Raises:
471+
ValueError: If no METADATA file is found in the wheel
472+
"""
473+
# Get dist-info path from wheel filename.
474+
# Uses same pattern as wheels.extract_info_from_wheel_file:
475+
_, dist_version, _, _ = parse_wheel_filename(wheel_filename.name)
476+
dist_name = wheel_filename.name.split("-", 1)[0]
477+
metadata_path = f"{dist_name}-{dist_version}.dist-info/METADATA"
478+
479+
with zipfile.ZipFile(wheel_filename) as whl:
480+
try:
481+
metadata_content = whl.read(metadata_path)
482+
except KeyError:
483+
raise ValueError(
484+
f"Could not find METADATA file in wheel: {wheel_filename}"
485+
) from None
486+
return parse_metadata(metadata_content, validate=validate)
487+
488+
431489
def get_pyproject_contents(sdist_root_dir: pathlib.Path) -> dict[str, typing.Any]:
432490
pyproject_toml_filename = sdist_root_dir / "pyproject.toml"
433491
if not os.path.exists(pyproject_toml_filename):

0 commit comments

Comments
 (0)