From 02767a9a9b5151bdf1972aeae6ee7f17a21b5dda Mon Sep 17 00:00:00 2001 From: Hashem Nasarat Date: Wed, 27 May 2026 20:45:26 -0400 Subject: [PATCH] Synthesize __getattr__ on untyped parents of typed subpackages Fixes #16149. When mypy follows a third-party `import pkg.typed` and `pkg` itself has no `py.typed`, `pkg` is loaded as an empty namespace package -- its `__init__.py` is never read. Subsequent `pkg.name` access from any other file then raises `attr-defined`, even when `__init__.py` re-exports `name` at runtime. The motivating case: numba ships `numba/typed/py.typed` but not `numba/py.typed`. Any source that does `import numba.typed` makes every `@numba.jit(...)` site elsewhere fail with Module has no attribute "jit" The existing workarounds (`follow_imports = "skip"` or per-module `follow_untyped_imports = True`) both require the user to know the shape of every dependency they pull in. Call chain for an example repro of the bug: 1. `foo.py` does `import numba.typed`. 2. `FindModuleCache._find_module("numba.typed")` calls `_find_module_non_stub_helper`, which walks the components and finds `numba/typed/py.typed` at iteration 1 -- so the helper returns the parent path `(pkg_dir/numba, False)` (the typed sub is reachable). 3. Back in `_find_module`, line 496 calls `_update_ns_ancestors(["numba", "typed"], (pkg_dir/numba, False))`. The loop's first iteration sets `ns_ancestors["numba"] = pkg_dir/numba` -- even though `numba/__init__.py` exists, i.e. `numba` is a regular package, not a namespace package. 4. Loading `numba.typed` requires its parent `numba` as an ancestor (`State.add_ancestors`). mypy calls `find_module("numba")`. 5. `_find_module_non_stub_helper("numba", pkg_dir)` returns `FOUND_WITHOUT_TYPE_HINTS` (no `numba/py.typed`). That should be the final answer -- but at line 595 the fallback `ancestor = self.ns_ancestors.get("numba")` hits the entry written in step 3 and returns the directory path instead. `numba` is now "found" with a real path. 6. mypy parses `numba/__init__.py` (silenced, because it was found by following imports into site-packages). The line `from numba.core import jit` triggers `find_module("numba.core")` -> `FOUND_WITHOUT_TYPE_HINTS`. Because the parent is being processed under silenced follow-imports, the resulting `ModuleNotFound` doesn't surface and `numba.core` is never analyzed; `jit` never enters `numba`'s symbol table. 7. `main.py` does `import numba`. Same find result as step 5 (cached). The symbol table from step 6 is reused -- it lacks `jit`. The access `numba.jit` raises `attr-defined`. My initial fix (not the one in this commit) had been to avoid adding a package to ns_ancestors in step 3 above, but that has the downside that usages of `import numba.typed` become `Any`, compared with the `__getattr__` fix. Instead, this more robust fix injects a module-level `__getattr__: (str) -> Any` into the parent's symbol table, which allows the typed submodule to stay typed. This synthesized annotation is added when: 1. the State isn't a stub, has no definitions, and no other gettattr 2. the module was previously only found as a parent module of a py.typed 3. self.path is a directory (treated as a namespace package), but there's actually a `__init__.py[i]` file there. The synthetic annotation is added with `module_public=False` and `module_hidden=True` so direct user accesses like `pkg.__getattr__` and `from pkg import __getattr__` fall through to `types.ModuleType.__getattr__` from typeshed -- the same answer a plain untyped module would give. mypy's internal __getattr__-fallback path reads `tree.names["__getattr__"]` directly and bypasses both flags, so the synthetic is still consulted for `pkg.X` lookups. `lookup_module_name` in semanal keeps its existing priority order: a real submodule already loaded into `self.modules` wins first, and only unresolved attributes fall back to `__getattr__`. So: - `numba.jit` -- resolves through `__getattr__` to `Any` instead of raising `attr-defined`. - `numba.typed` resolves to the typed submodule. - Direct binding forms (`from numba.typed import X`, `from numba import typed`, `import numba.typed as ts`) remain typed. The helper needs `find_module_cache.ns_ancestors` and two cached FS reads via `fscache`, both owned by `BuildManager`, so doing the work in `State` reuses `self.manager` natively. Since `semantic_analysis_pass1` runs once per module and uses a number of early exits. When analyzing the State per-file --timing-stats the change very minimal (20us). (Claude 4.7 used brainstorming different iterations of this fix.) --- mypy/build.py | 34 ++++++++++- mypy/modulefinder.py | 3 + .../untypedpkg_w_typed_sub/pyproject.toml | 11 ++++ .../untypedpkg_w_typed_sub/__init__.py | 1 + .../untypedpkg_w_typed_sub/sibling.py | 1 + .../typed_sub/__init__.py | 1 + .../untypedpkg_w_typed_sub/typed_sub/py.typed | 0 test-data/unit/pep561.test | 60 +++++++++++++++++++ 8 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 test-data/packages/untypedpkg_w_typed_sub/pyproject.toml create mode 100644 test-data/packages/untypedpkg_w_typed_sub/untypedpkg_w_typed_sub/__init__.py create mode 100644 test-data/packages/untypedpkg_w_typed_sub/untypedpkg_w_typed_sub/sibling.py create mode 100644 test-data/packages/untypedpkg_w_typed_sub/untypedpkg_w_typed_sub/typed_sub/__init__.py create mode 100644 test-data/packages/untypedpkg_w_typed_sub/untypedpkg_w_typed_sub/typed_sub/py.typed diff --git a/mypy/build.py b/mypy/build.py index 09e739f7fb991..6d5fe6454938f 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -108,6 +108,7 @@ ) from mypy.messages import MessageBuilder from mypy.nodes import ( + GDEF, Decorator, FileRawData, FuncDef, @@ -118,6 +119,8 @@ MypyFile, OverloadedFuncDef, SymbolTable, + SymbolTableNode, + Var, ) from mypy.options import OPTIONS_AFFECTING_CACHE_NO_PLATFORM from mypy.partially_defined import PossiblyUndefinedVariableVisitor @@ -169,7 +172,7 @@ from mypy.renaming import LimitedVariableRenameVisitor, VariableRenameVisitor from mypy.stats import dump_type_stats from mypy.stubinfo import stub_distribution_name -from mypy.types import Type, instance_cache +from mypy.types import AnyType, Type, TypeOfAny, instance_cache from mypy.typestate import reset_global_state, type_state from mypy.util import json_dumps, json_loads from mypy.version import __version__ @@ -3185,6 +3188,34 @@ def parse_file_inner(self, source: str | None, raw_data: FileRawData | None = No ) self.time_spent_us += time_spent_us(t0) + def _maybe_inject_synthetic_getattr_for_typed_subpackage(self) -> None: + """Give an untyped parent of a typed subpackage a fallback __getattr__. + + Without this, attribute access on the parent raises ``attr-defined`` + for any name that's defined in the parent's ``__init__.py`` -- because + that file is never read (the ns_ancestors fallback bound this State to + the package directory). + """ + tree = self.tree + if tree is None or tree.is_stub or tree.defs or "__getattr__" in tree.names: + return + if self.id not in self.manager.find_module_cache.ns_ancestors: + return + # If path is a directory and `__init__.py` exists, we know the file + # wasn't read. If `--follow-untyped-imports` was used, path would be the + # `__init__.py` file itself, in which we don't want to interfere. + if not self.path or not self.manager.fscache.isdir(self.path): + return + if not ( + self.manager.fscache.isfile(os_path_join(self.path, "__init__.py")) + or self.manager.fscache.isfile(os_path_join(self.path, "__init__.pyi")) + ): + return + var = Var("__getattr__", type=AnyType(TypeOfAny.from_unimported_type)) + tree.names["__getattr__"] = SymbolTableNode( + GDEF, var, module_public=False, module_hidden=True + ) + def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None = None) -> None: """Parse file and run first pass of semantic analysis. @@ -3311,6 +3342,7 @@ def semantic_analysis_pass1(self) -> None: analyzer.visit_file(self.tree, self.xpath, self.id, options) # TODO: Do this while constructing the AST? self.tree.names = SymbolTable() + self._maybe_inject_synthetic_getattr_for_typed_subpackage() if not self.tree.is_stub: if not self.options.allow_redefinition: # Perform some low-key variable renaming when assignments can't diff --git a/mypy/modulefinder.py b/mypy/modulefinder.py index baa521afbb122..f109df40fc4a6 100644 --- a/mypy/modulefinder.py +++ b/mypy/modulefinder.py @@ -196,6 +196,9 @@ def __init__( self.initial_components: dict[tuple[str, ...], dict[str, list[str]]] = {} # Cache find_module: id -> result self.results: dict[str, ModuleSearchResult] = {} + # Ancestor packages reached only because a descendant ships py.typed. + # Maps id -> directory; consulted as a fallback in `_find_module` so + # those parents are findable too. self.ns_ancestors: dict[str, str] = {} self.options = options custom_typeshed_dir = None diff --git a/test-data/packages/untypedpkg_w_typed_sub/pyproject.toml b/test-data/packages/untypedpkg_w_typed_sub/pyproject.toml new file mode 100644 index 0000000000000..88b8839751eb2 --- /dev/null +++ b/test-data/packages/untypedpkg_w_typed_sub/pyproject.toml @@ -0,0 +1,11 @@ +[project] +name = 'untypedpkg_w_typed_sub' +version = '0.1' +description = 'test' + +[tool.hatch.build] +include = ["**/*.py", "**/*.pyi", "**/py.typed"] + +[build-system] +requires = ["hatchling==1.18"] +build-backend = "hatchling.build" diff --git a/test-data/packages/untypedpkg_w_typed_sub/untypedpkg_w_typed_sub/__init__.py b/test-data/packages/untypedpkg_w_typed_sub/untypedpkg_w_typed_sub/__init__.py new file mode 100644 index 0000000000000..e877cd2500382 --- /dev/null +++ b/test-data/packages/untypedpkg_w_typed_sub/untypedpkg_w_typed_sub/__init__.py @@ -0,0 +1 @@ +from untypedpkg_w_typed_sub.sibling import re_exported diff --git a/test-data/packages/untypedpkg_w_typed_sub/untypedpkg_w_typed_sub/sibling.py b/test-data/packages/untypedpkg_w_typed_sub/untypedpkg_w_typed_sub/sibling.py new file mode 100644 index 0000000000000..d26ec8a4e278a --- /dev/null +++ b/test-data/packages/untypedpkg_w_typed_sub/untypedpkg_w_typed_sub/sibling.py @@ -0,0 +1 @@ +def re_exported() -> int: ... diff --git a/test-data/packages/untypedpkg_w_typed_sub/untypedpkg_w_typed_sub/typed_sub/__init__.py b/test-data/packages/untypedpkg_w_typed_sub/untypedpkg_w_typed_sub/typed_sub/__init__.py new file mode 100644 index 0000000000000..283734b3e8ec9 --- /dev/null +++ b/test-data/packages/untypedpkg_w_typed_sub/untypedpkg_w_typed_sub/typed_sub/__init__.py @@ -0,0 +1 @@ +EXAMPLE_CONST: int = 42 diff --git a/test-data/packages/untypedpkg_w_typed_sub/untypedpkg_w_typed_sub/typed_sub/py.typed b/test-data/packages/untypedpkg_w_typed_sub/untypedpkg_w_typed_sub/typed_sub/py.typed new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/test-data/unit/pep561.test b/test-data/unit/pep561.test index a913dacc37e0a..b249ef09cb969 100644 --- a/test-data/unit/pep561.test +++ b/test-data/unit/pep561.test @@ -248,3 +248,63 @@ import typedpkg_ns.b # type: ignore import typedpkg_ns.a [out] [out2] + +[case testUntypedParentWithTypedSubDoesNotBreakReExports] +# pkgs: untypedpkg_w_typed_sub +# Regression: untyped parent with a typed subpackage. The parent gets a +# synthetic __getattr__ so unresolved attrs return Any (no spurious +# attr-defined), but real submodules still win in attribute lookup so +# dotted access through the parent stays precisely typed. +import other +from untypedpkg_w_typed_sub import re_exported # type: ignore[import-untyped] +reveal_type(re_exported) +[file other.py] +import untypedpkg_w_typed_sub.typed_sub # type: ignore[import-untyped] +import untypedpkg_w_typed_sub.typed_sub as ts # type: ignore[import-untyped] +from untypedpkg_w_typed_sub import typed_sub # type: ignore[import-untyped] +from untypedpkg_w_typed_sub.typed_sub import EXAMPLE_CONST # type: ignore[import-untyped] +reveal_type(untypedpkg_w_typed_sub.typed_sub.EXAMPLE_CONST) +reveal_type(ts.EXAMPLE_CONST) +reveal_type(typed_sub.EXAMPLE_CONST) +reveal_type(EXAMPLE_CONST) +[out] +other.py:5: note: Revealed type is "int" +other.py:6: note: Revealed type is "int" +other.py:7: note: Revealed type is "int" +other.py:8: note: Revealed type is "int" +testUntypedParentWithTypedSubDoesNotBreakReExports.py:8: note: Revealed type is "Any" + +[case testUntypedParentWithTypedSubFollowUntypedImports] +# pkgs: untypedpkg_w_typed_sub +# flags: --follow-untyped-imports +# When the user opts into precise untyped-import analysis, mypy parses the +# parent's __init__.py for real -- so the re-export gets its precise type +# (not Any), and unknown attributes correctly raise attr-defined. The +# synthetic __getattr__ fix stays dormant in this mode. +import untypedpkg_w_typed_sub +import untypedpkg_w_typed_sub.typed_sub +from untypedpkg_w_typed_sub import re_exported +reveal_type(re_exported) +reveal_type(untypedpkg_w_typed_sub.typed_sub.EXAMPLE_CONST) +untypedpkg_w_typed_sub.nonexistent +[out] +testUntypedParentWithTypedSubFollowUntypedImports.py:10: note: Revealed type is "def () -> int" +testUntypedParentWithTypedSubFollowUntypedImports.py:11: note: Revealed type is "int" +testUntypedParentWithTypedSubFollowUntypedImports.py:12: error: Module has no attribute "nonexistent" + +[case testUntypedParentWithTypedSubGetattrIsHidden] +# pkgs: untypedpkg_w_typed_sub +# The synthetic __getattr__ must not leak as a user-visible attribute or +# satisfy `from pkg import __getattr__`. Direct access to `pkg.__getattr__` +# should fall through to `types.ModuleType.__getattr__` -- the same answer +# a plain untyped module would give. +import untypedpkg_w_typed_sub.typed_sub # type: ignore[import-untyped] +import untypedpkg_w_typed_sub # type: ignore[import-untyped] +reveal_type(untypedpkg_w_typed_sub.__getattr__) +from untypedpkg_w_typed_sub import __getattr__ as g # type: ignore[import-untyped] +reveal_type(g) +[out] +testUntypedParentWithTypedSubGetattrIsHidden.py:8: note: Revealed type is "def (str) -> Any" +testUntypedParentWithTypedSubGetattrIsHidden.py:9: error: Module "untypedpkg_w_typed_sub" does not explicitly export attribute "__getattr__" +testUntypedParentWithTypedSubGetattrIsHidden.py:9: note: Error code "attr-defined" not covered by "type: ignore[import-untyped]" comment +testUntypedParentWithTypedSubGetattrIsHidden.py:10: note: Revealed type is "Any"