Skip to content

perf(telemetry): remove call to importlib.metadata from get_module_distribution_versions #13278

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ddtrace/internal/debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def collect(tracer):

is_venv = in_venv()

packages_available = {p.name: p.version for p in get_distributions()}
packages_available = {name: version for (name, version) in get_distributions().items()}
integration_configs = {} # type: Dict[str, Union[Dict[str, Any], str]]
for module, enabled in ddtrace._monkey.PATCH_MODULES.items():
# TODO: this check doesn't work in all cases... we need a mapping
Expand Down
40 changes: 14 additions & 26 deletions ddtrace/internal/packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from functools import singledispatch
import inspect
import logging
from os import fspath # noqa:F401
import sys
import sysconfig
from types import ModuleType
Expand All @@ -17,32 +16,29 @@

LOG = logging.getLogger(__name__)

Distribution = t.NamedTuple("Distribution", [("name", str), ("version", str)])

Distribution = t.NamedTuple("Distribution", [("name", str), ("version", str), ("path", t.Optional[str])])

_PACKAGE_DISTRIBUTIONS: t.Optional[t.Mapping[str, t.List[str]]] = None


@callonce
def get_distributions():
# type: () -> t.Set[Distribution]
"""returns the name and version of all distributions in a python path"""
def get_distributions() -> t.Mapping[str, str]:
"""returns the mapping from distribution name to version for all distributions in a python path"""
try:
import importlib.metadata as importlib_metadata
except ImportError:
import importlib_metadata # type: ignore[no-redef]

pkgs = set()
pkgs = {}
for dist in importlib_metadata.distributions():
# Get the root path of all files in a distribution
path = str(dist.locate_file(""))
# PKG-INFO and/or METADATA files are parsed when dist.metadata is accessed
# Optimization: we should avoid accessing dist.metadata more than once
metadata = dist.metadata
name = metadata["name"]
name = metadata["name"].lower()
version = metadata["version"]
if name and version:
pkgs.add(Distribution(path=path, name=name.lower(), version=version))
pkgs[name] = version

return pkgs

Expand All @@ -68,26 +64,18 @@ def get_package_distributions() -> t.Mapping[str, t.List[str]]:
def get_module_distribution_versions(module_name: str) -> t.Optional[t.Tuple[str, str]]:
if not module_name:
return None
try:
import importlib.metadata as importlib_metadata
except ImportError:
import importlib_metadata # type: ignore[no-redef]

names: t.List[str] = []
pkgs = get_package_distributions()
dist_map = get_distributions()
while names == []:
try:
package = importlib_metadata.distribution(module_name)
metadata = package.metadata
name = metadata["name"]
version = metadata["version"]
if name and version:
return (name, version)
except Exception: # nosec
pass
# First try to resolve the module name from package distributions
version = dist_map.get(module_name)
if version:
return (module_name, version)
# Since we've failed to resolve, try to resolve the parent package
names = pkgs.get(module_name, [])
if not names:
# try to resolve the parent package
p = module_name.rfind(".")
if p > 0:
module_name = module_name[:p]
Expand All @@ -100,7 +88,7 @@ def get_module_distribution_versions(module_name: str) -> t.Optional[t.Tuple[str
return (names[0], get_version_for_package(names[0]))


@cached(maxsize=256)
@cached(maxsize=1024)
def get_version_for_package(name):
# type: (str) -> str
"""returns the version of a package"""
Expand Down Expand Up @@ -194,7 +182,7 @@ def is_namespace(f: importlib_metadata.PackagePath):
if not (files := dist.files):
continue
metadata = dist.metadata
d = Distribution(name=metadata["name"], version=metadata["version"], path=None)
d = Distribution(name=metadata["name"], version=metadata["version"])
for f in files:
root = f.parts[0]
if root.endswith(".dist-info") or root.endswith(".egg-info") or root == "..":
Expand Down
6 changes: 6 additions & 0 deletions releasenotes/notes/perf-telemetry-d9881d20f22013f7.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
fixes:
- |
telemetry: improves periodic telemetry writer performance by removing
unnecessary calls to ``importlib.metadata`` for reporting imported dependencies.

22 changes: 9 additions & 13 deletions tests/internal/test_packages.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

import pytest

from ddtrace.internal.packages import _third_party_packages
Expand Down Expand Up @@ -40,28 +38,26 @@ def test_get_distributions():
pkg_resources_ws = {pkg.project_name.lower() for pkg in pkg_resources.working_set}

importlib_pkgs = set()
for pkg in get_distributions():
assert pkg.name
assert pkg.version
assert os.path.exists(pkg.path)
for name, version in get_distributions().items():
assert version
# The package name in typing_extensions-4.x.x.dist-info/METADATA is set to `typing_extensions`
# this is inconsistent with the package name found in pkg_resources. The block below corrects this.
# The correct package name is typing-extensions.
# The issue exists in pkgutil-resolve-name package.
if pkg.name == "typing_extensions" and "typing-extensions" in pkg_resources_ws:
if name == "typing_extensions" and "typing-extensions" in pkg_resources_ws:
importlib_pkgs.add("typing-extensions")
elif pkg.name == "pkgutil_resolve_name" and "pkgutil-resolve-name" in pkg_resources_ws:
elif name == "pkgutil_resolve_name" and "pkgutil-resolve-name" in pkg_resources_ws:
importlib_pkgs.add("pkgutil-resolve-name")
elif pkg.name == "importlib_metadata" and "importlib-metadata" in pkg_resources_ws:
elif name == "importlib_metadata" and "importlib-metadata" in pkg_resources_ws:
importlib_pkgs.add("importlib-metadata")
elif pkg.name == "importlib-metadata" and "importlib_metadata" in pkg_resources_ws:
elif name == "importlib-metadata" and "importlib_metadata" in pkg_resources_ws:
importlib_pkgs.add("importlib_metadata")
elif pkg.name == "importlib-resources" and "importlib_resources" in pkg_resources_ws:
elif name == "importlib-resources" and "importlib_resources" in pkg_resources_ws:
importlib_pkgs.add("importlib_resources")
elif pkg.name == "importlib_resources" and "importlib-resources" in pkg_resources_ws:
elif name == "importlib_resources" and "importlib-resources" in pkg_resources_ws:
importlib_pkgs.add("importlib-resources")
else:
importlib_pkgs.add(pkg.name)
importlib_pkgs.add(name)

# assert that pkg_resources and importlib.metadata return the same packages
assert pkg_resources_ws == importlib_pkgs
Expand Down
Loading