Skip to content

Commit be9d4c7

Browse files
committed
Resolve wildcard imports via __all__ or public-names rule (#126)
`from pkg import *` is now desugared at analysis time against the target package's exports, so names reached via wildcard — including those re-exported through __init__.py — appear as concrete edges instead of as spurious *.* residue at the importer's module level. Export determination: - Literal __all__ = ["a", "b"] (list or tuple, string elements only) is authoritative, including leading-underscore names if listed. - Absent __all__: public-names rule — every module-scope binding whose identifier does not start with underscore. - Non-literal forms (augmented assignment, dynamic construction) fall back to the public-names rule with a debug log. Implementation: - New self.module_all dict populated from AST by _extract_dunder_all. - New _module_public_exports helper with short-name → FQ-name candidate resolution, mirroring _record_import. - visit_ImportFrom desugars wildcard aliases against the export set before the per-name binding loop. - New prescan phase in CallGraphVisitor.process populates scopes and __all__ for every input file before pass 1, making wildcard desugaring order-independent (consumers of a wildcard import no longer need to appear after the exporting package in the filename list). Version bumped to 2.5.0-dev — this is a new-feature change, not a patch.
1 parent 6e093de commit be9d4c7

9 files changed

Lines changed: 271 additions & 4 deletions

File tree

CHANGELOG.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
# Changelog
22

3-
## 2.4.4 (in progress)
3+
## 2.5.0 (in progress)
44

5-
*No user-visible changes yet.*
5+
### New features
6+
7+
- **Wildcard imports now resolve to actual targets.** `from pkg import *` is desugared at analysis time against the target package's `__all__` when declared as a literal list/tuple of strings, and against the public-names rule (every module-scope name not starting with `_`) otherwise. Names reached via wildcard — including those re-exported through `__init__.py` — now appear as concrete edges in the call graph instead of as spurious `*.*` residue at the importer's module level. Non-literal `__all__` forms (augmented assignment, dynamic construction) fall back to the public-names rule with a debug log. (#126)
8+
9+
### Internal
10+
11+
- **Prescan phase added before the two visitor passes.** `CallGraphVisitor.process` now does a lightweight scope + `__all__` walk over every input file up front, so cross-module metadata is fully populated before pass 1. This makes wildcard desugaring order-independent — the consumer of a wildcard import no longer has to appear after the exporting package in the filename list.
612

713

814
---

pyan/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python3
22

3-
__version__ = "2.4.4-dev"
3+
__version__ = "2.5.0-dev"
44

55
from .main import create_callgraph, main # noqa: F401
66
from .modvis import create_modulegraph # noqa: F401

pyan/analyzer.py

Lines changed: 157 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,13 @@ def _init_common(self, logger):
156156
# Used by expand_unknowns to constrain wildcard expansion.
157157
self.namespace_imports = {} # e.g. {"mymod.func": {"os", "foo.bar"}}
158158

159+
# module name → set of names exposed by `from module import *`.
160+
# A value of None means __all__ was present but in a form we don't
161+
# parse (augmented, dynamic, etc.) — callers should fall back to the
162+
# public-names rule via ``_module_public_exports``.
163+
# Populated by visit_Module when a literal __all__ assignment is seen.
164+
self.module_all = {} # e.g. {"pkg": {"fn1", "fn2"}}
165+
159166
# current context for analysis
160167
self.module_name = None
161168
self.filename = None
@@ -167,6 +174,12 @@ def _init_common(self, logger):
167174

168175
def process(self):
169176
"""Analyze the set of files, twice so that any forward-references are picked up."""
177+
# Prescan: populate self.scopes and self.module_all for every file
178+
# before the main visitor passes. This lets cross-module lookups
179+
# (wildcard desugaring, chiefly) succeed in pass 1 regardless of
180+
# the order in which filenames were given.
181+
for filename in self.filenames:
182+
self._prescan_one(filename)
170183
for pas in range(2):
171184
for filename in self.filenames:
172185
self.logger.info(f"========== pass {pas + 1}, file '{filename}' ==========")
@@ -175,6 +188,31 @@ def process(self):
175188
self.resolve_base_classes() # must be done only after all files seen
176189
self.postprocess()
177190

191+
def _prescan_one(self, filename):
192+
"""Populate self.scopes and self.module_all for a single file.
193+
194+
Reads content, runs symtable-based scope analysis, and extracts any
195+
literal ``__all__``. Does **not** run the main AST visitor — this is
196+
strictly metadata collection so that cross-module lookups during the
197+
subsequent visitor passes find what they need.
198+
"""
199+
if hasattr(self, "_source_texts"):
200+
content = self._source_texts[filename]
201+
display_name = filename.removesuffix(".__init__") if filename.endswith(".__init__") else filename
202+
else:
203+
with open(filename, encoding="utf-8") as f:
204+
content = f.read()
205+
display_name = get_module_name(filename, root=self.root)
206+
207+
saved_module_name = self.module_name
208+
self.module_name = display_name
209+
try:
210+
self.analyze_scopes(content, filename)
211+
tree = ast.parse(content, filename)
212+
self._extract_dunder_all(tree.body)
213+
finally:
214+
self.module_name = saved_module_name
215+
178216
def process_one(self, filename):
179217
"""Analyze one source unit (file path, or module name in source mode).
180218
@@ -593,12 +631,73 @@ def visit_Module(self, node):
593631
module_node = self.get_node("", self.module_name, node, flavor=Flavor.MODULE)
594632
self.associate_node(module_node, node, filename=self.filename)
595633

634+
# Extract __all__ so that `from module import *` can desugar against it
635+
# (or against the public-names rule when absent). Done at module entry,
636+
# before visiting children, so the data is available if this module is
637+
# itself visited later during the same pass — see visit_ImportFrom.
638+
self._extract_dunder_all(node.body)
639+
596640
with self._module_scope(self.module_name):
597641
self.generic_visit(node) # visit the **children** of node
598642

599643
if self.add_defines_edge(module_node, None):
600644
self.logger.info(f"Def Module {node}")
601645

646+
def _extract_dunder_all(self, module_body):
647+
"""Record the current module's ``__all__`` in ``self.module_all``.
648+
649+
Parses only the simple literal forms::
650+
651+
__all__ = ["a", "b"]
652+
__all__ = ("a", "b")
653+
__all__: list[str] = ["a", "b"] # PEP 526 annotated assignment
654+
655+
Anything else — augmented assignment (``__all__ += [...]``), calls
656+
(``__all__ = _compute()``), or non-string elements — is skipped with
657+
a debug log, leaving callers to fall back to the public-names rule.
658+
659+
Multiple top-level ``__all__`` assignments: the last one wins, matching
660+
Python's own binding semantics.
661+
"""
662+
names = None
663+
saw_unparseable = False
664+
for stmt in module_body:
665+
target_value = None
666+
if isinstance(stmt, ast.Assign):
667+
for tgt in stmt.targets:
668+
if isinstance(tgt, ast.Name) and tgt.id == "__all__":
669+
target_value = stmt.value
670+
break
671+
elif isinstance(stmt, ast.AnnAssign):
672+
if (isinstance(stmt.target, ast.Name)
673+
and stmt.target.id == "__all__"
674+
and stmt.value is not None):
675+
target_value = stmt.value
676+
elif isinstance(stmt, ast.AugAssign):
677+
if isinstance(stmt.target, ast.Name) and stmt.target.id == "__all__":
678+
saw_unparseable = True
679+
continue
680+
681+
if target_value is None:
682+
continue
683+
684+
if isinstance(target_value, (ast.List, ast.Tuple)) and all(
685+
isinstance(e, ast.Constant) and isinstance(e.value, str)
686+
for e in target_value.elts
687+
):
688+
names = {e.value for e in target_value.elts}
689+
else:
690+
saw_unparseable = True
691+
names = None # discard any prior literal — last assignment wins
692+
693+
if saw_unparseable and names is None:
694+
self.logger.debug(
695+
f"__all__ in module '{self.module_name}' uses a form pyan does not parse "
696+
f"(augmented/dynamic); falling back to the public-names rule"
697+
)
698+
if names is not None:
699+
self.module_all[self.module_name] = names
700+
602701
def visit_ClassDef(self, node):
603702
self.logger.debug(f"ClassDef {node.name}, {self.filename}:{node.lineno}")
604703

@@ -943,8 +1042,25 @@ def visit_ImportFrom(self, node):
9431042

9441043
self._record_import(tgt_name)
9451044

1045+
# Desugar `from tgt_name import *` against the target module's public
1046+
# exports (__all__ if literal, otherwise the public-names rule). If the
1047+
# target wasn't analyzed — or hasn't been visited yet in this pass —
1048+
# we leave the wildcard alone; pass 2 (or expand_unknowns) will handle
1049+
# what it can. See _module_public_exports for the lookup rules.
1050+
names = node.names
1051+
if len(names) == 1 and names[0].name == "*":
1052+
exports = self._module_public_exports(tgt_name)
1053+
if exports is not None:
1054+
self.logger.debug(
1055+
f"Desugaring 'from {tgt_name} import *' to {sorted(exports)}, "
1056+
f"{self.filename}:{node.lineno}"
1057+
)
1058+
# Synthesize aliases for each exported name. asname=None because
1059+
# star-import can't rebind (there's no `as` clause on `*`).
1060+
names = [ast.alias(name=n, asname=None) for n in exports]
1061+
9461062
# link each import separately
947-
for alias in node.names:
1063+
for alias in names:
9481064
# check if import is module
9491065
if tgt_name + "." + alias.name in self.module_to_filename:
9501066
to_node = self.get_node("", tgt_name + "." + alias.name, node, flavor=Flavor.MODULE)
@@ -959,6 +1075,46 @@ def visit_ImportFrom(self, node):
9591075
if self.add_uses_edge(from_node, to_node):
9601076
self.logger.info(f"New edge added for Use from {from_node} to ImportFrom {to_node}")
9611077

1078+
def _module_public_exports(self, module_name):
1079+
"""Return the set of names exposed by ``from module_name import *``.
1080+
1081+
If the module declares a literal ``__all__`` (see ``_extract_dunder_all``),
1082+
that set is authoritative — even a leading-underscore name counts if listed.
1083+
Otherwise the public-names rule applies: every name bound at module scope
1084+
whose identifier does not start with an underscore.
1085+
1086+
*module_name* may be a short name from an import statement
1087+
(``"common"``) or a fully qualified one (``"pkg.sub.common"``); both are
1088+
resolved against the analyzer's known module set, mirroring what
1089+
``_record_import`` does for ``namespace_imports``.
1090+
1091+
Returns ``None`` when no matching analyzed module is found.
1092+
Callers are expected to fall back to the current wildcard-IMPORTEDITEM
1093+
behavior in that case.
1094+
"""
1095+
for candidate in self._module_name_candidates(module_name):
1096+
if candidate in self.module_all:
1097+
return self.module_all[candidate]
1098+
scope = self.scopes.get(candidate)
1099+
if scope is not None:
1100+
return {n for n in scope.defs if not n.startswith("_")}
1101+
return None
1102+
1103+
def _module_name_candidates(self, module_name):
1104+
"""Yield fully qualified module names that could match *module_name*.
1105+
1106+
First yields *module_name* itself (handles the fully qualified case),
1107+
then any analyzed module whose FQ name ends in ``"." + module_name``
1108+
(handles short names from absolute imports where the project root adds
1109+
extra prefix components, e.g. ``from common import *`` matching
1110+
``tests.fixtures.common``).
1111+
"""
1112+
yield module_name
1113+
suffix = "." + module_name
1114+
for fq_name in self.module_to_filename:
1115+
if fq_name.endswith(suffix) and fq_name != module_name:
1116+
yield fq_name
1117+
9621118
def analyze_module_import(self, import_item, ast_node):
9631119
"""Analyze a names AST node inside an Import or ImportFrom AST node.
9641120
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from pkg_with_all import * # noqa: F401, F403
2+
from pkg_private import * # noqa: F401, F403
3+
4+
5+
def use_with_all():
6+
alpha() # noqa: F405 — from pkg_with_all's __all__
7+
_helper() # noqa: F405 — leading underscore, but listed in __all__
8+
9+
10+
def use_private():
11+
pub() # noqa: F405 — non-underscore, brought in by public-names rule
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# No __all__ — public-names rule applies: wildcard brings in everything
2+
# whose name does not start with an underscore.
3+
from .exports import pub, _priv # noqa: F401
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
def pub():
2+
return "public"
3+
4+
5+
def _priv():
6+
return "private"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Literal __all__ — only these names are re-exported by `import *`,
2+
# even though the module binds more.
3+
from .exports import alpha, beta, gamma, _helper # noqa: F401
4+
5+
__all__ = ["alpha", "_helper"]
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
def alpha():
2+
return "alpha"
3+
4+
5+
def beta():
6+
return "beta"
7+
8+
9+
def gamma():
10+
return "gamma"
11+
12+
13+
def _helper():
14+
return "helper"

tests/test_regressions.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,3 +329,69 @@ def test_issue126_wildcard_import_resolves_to_submodule():
329329
fn_parent = f"{ISSUE126_PREFIX}.test_sample.fn_parent"
330330
uses = get_in_dict(v.uses_edges, fn_parent)
331331
get_node(uses, f"{ISSUE126_PREFIX}.common.file3.fn3")
332+
333+
334+
def test_issue126_no_spurious_wildcard_edge_at_module_level():
335+
"""After wildcard desugaring (v2.5), ``from common import *`` should not
336+
leave a ``*.*`` residue edge at the importer's module level."""
337+
v = _issue126_visitor()
338+
mod_uses = get_in_dict(v.uses_edges, f"{ISSUE126_PREFIX}.test_sample")
339+
targets = {n.get_name() for n in mod_uses}
340+
assert "*.*" not in targets, f"unexpected wildcard residue: {sorted(targets)}"
341+
342+
343+
# --- Wildcard imports: __all__ vs. public-names rule ---
344+
345+
DUNDER_ALL_DIR = os.path.join(TESTS_DIR, "test_code/dunder_all")
346+
DUNDER_ALL_PREFIX = "test_code.dunder_all"
347+
348+
349+
def _dunder_all_visitor():
350+
from glob import glob as globfunc
351+
352+
filenames = sorted(globfunc(os.path.join(DUNDER_ALL_DIR, "**/*.py"), recursive=True))
353+
return CallGraphVisitor(filenames, root=TESTS_DIR, logger=logging.getLogger())
354+
355+
356+
def test_dunder_all_literal_governs_wildcard():
357+
"""With literal ``__all__ = ["alpha", "_helper"]`` in the package, a
358+
downstream ``from pkg_with_all import *; alpha(); _helper()`` should
359+
resolve both — ``__all__`` is authoritative and overrides the default
360+
underscore-is-private rule."""
361+
v = _dunder_all_visitor()
362+
use = f"{DUNDER_ALL_PREFIX}.consumer.use_with_all"
363+
uses = get_in_dict(v.uses_edges, use)
364+
get_node(uses, f"{DUNDER_ALL_PREFIX}.pkg_with_all.exports.alpha")
365+
get_node(uses, f"{DUNDER_ALL_PREFIX}.pkg_with_all.exports._helper")
366+
367+
368+
def test_dunder_all_literal_excludes_unlisted_names():
369+
"""Names bound in the module but absent from ``__all__`` should not be
370+
reachable through ``import *``. ``beta`` and ``gamma`` are imported into
371+
pkg_with_all's namespace but not listed, so consumer's wildcard shouldn't
372+
bind them — there should be no edge from use_with_all to beta/gamma."""
373+
v = _dunder_all_visitor()
374+
use = f"{DUNDER_ALL_PREFIX}.consumer.use_with_all"
375+
uses = get_in_dict(v.uses_edges, use)
376+
targets = {n.get_name() for n in uses}
377+
assert f"{DUNDER_ALL_PREFIX}.pkg_with_all.exports.beta" not in targets
378+
assert f"{DUNDER_ALL_PREFIX}.pkg_with_all.exports.gamma" not in targets
379+
380+
381+
def test_public_names_rule_without_dunder_all():
382+
"""When ``__all__`` is absent, wildcard brings in every non-underscore
383+
name bound at module scope. ``pub`` should resolve; ``_priv`` should not
384+
— it's leading-underscore and not whitelisted."""
385+
v = _dunder_all_visitor()
386+
use = f"{DUNDER_ALL_PREFIX}.consumer.use_private"
387+
uses = get_in_dict(v.uses_edges, use)
388+
get_node(uses, f"{DUNDER_ALL_PREFIX}.pkg_private.exports.pub")
389+
390+
391+
def test_dunder_all_recorded_only_when_literal():
392+
"""The extractor should record pkg_with_all's __all__ but leave
393+
pkg_private absent (no __all__ statement at all)."""
394+
v = _dunder_all_visitor()
395+
assert f"{DUNDER_ALL_PREFIX}.pkg_with_all" in v.module_all
396+
assert v.module_all[f"{DUNDER_ALL_PREFIX}.pkg_with_all"] == {"alpha", "_helper"}
397+
assert f"{DUNDER_ALL_PREFIX}.pkg_private" not in v.module_all

0 commit comments

Comments
 (0)