diff --git a/ddtrace/internal/debug.py b/ddtrace/internal/debug.py index e3c1cd181d8..c70142baeae 100644 --- a/ddtrace/internal/debug.py +++ b/ddtrace/internal/debug.py @@ -75,7 +75,7 @@ def collect(tracer): is_venv = in_venv() - packages_available = {p.name: p.version for p in get_distributions()} + packages_available = {name: version for (name, version) in get_distributions().items()} integration_configs = {} # type: Dict[str, Union[Dict[str, Any], str]] for module, enabled in ddtrace._monkey.PATCH_MODULES.items(): # TODO: this check doesn't work in all cases... we need a mapping diff --git a/ddtrace/internal/packages.py b/ddtrace/internal/packages.py index cbadd5168c1..7f0a37ae87b 100644 --- a/ddtrace/internal/packages.py +++ b/ddtrace/internal/packages.py @@ -3,7 +3,6 @@ from functools import singledispatch import inspect import logging -from os import fspath # noqa:F401 import sys import sysconfig from types import ModuleType @@ -17,32 +16,29 @@ LOG = logging.getLogger(__name__) +Distribution = t.NamedTuple("Distribution", [("name", str), ("version", str)]) -Distribution = t.NamedTuple("Distribution", [("name", str), ("version", str), ("path", t.Optional[str])]) _PACKAGE_DISTRIBUTIONS: t.Optional[t.Mapping[str, t.List[str]]] = None @callonce -def get_distributions(): - # type: () -> t.Set[Distribution] - """returns the name and version of all distributions in a python path""" +def get_distributions() -> t.Mapping[str, str]: + """returns the mapping from distribution name to version for all distributions in a python path""" try: import importlib.metadata as importlib_metadata except ImportError: import importlib_metadata # type: ignore[no-redef] - pkgs = set() + pkgs = {} for dist in importlib_metadata.distributions(): - # Get the root path of all files in a distribution - path = str(dist.locate_file("")) # PKG-INFO and/or METADATA files are parsed when dist.metadata is accessed # Optimization: we should avoid accessing dist.metadata more than once metadata = dist.metadata - name = metadata["name"] + name = metadata["name"].lower() version = metadata["version"] if name and version: - pkgs.add(Distribution(path=path, name=name.lower(), version=version)) + pkgs[name] = version return pkgs @@ -68,26 +64,18 @@ def get_package_distributions() -> t.Mapping[str, t.List[str]]: def get_module_distribution_versions(module_name: str) -> t.Optional[t.Tuple[str, str]]: if not module_name: return None - try: - import importlib.metadata as importlib_metadata - except ImportError: - import importlib_metadata # type: ignore[no-redef] names: t.List[str] = [] pkgs = get_package_distributions() + dist_map = get_distributions() while names == []: - try: - package = importlib_metadata.distribution(module_name) - metadata = package.metadata - name = metadata["name"] - version = metadata["version"] - if name and version: - return (name, version) - except Exception: # nosec - pass + # First try to resolve the module name from package distributions + version = dist_map.get(module_name) + if version: + return (module_name, version) + # Since we've failed to resolve, try to resolve the parent package names = pkgs.get(module_name, []) if not names: - # try to resolve the parent package p = module_name.rfind(".") if p > 0: module_name = module_name[:p] @@ -100,7 +88,7 @@ def get_module_distribution_versions(module_name: str) -> t.Optional[t.Tuple[str return (names[0], get_version_for_package(names[0])) -@cached(maxsize=256) +@cached(maxsize=1024) def get_version_for_package(name): # type: (str) -> str """returns the version of a package""" @@ -194,7 +182,7 @@ def is_namespace(f: importlib_metadata.PackagePath): if not (files := dist.files): continue metadata = dist.metadata - d = Distribution(name=metadata["name"], version=metadata["version"], path=None) + d = Distribution(name=metadata["name"], version=metadata["version"]) for f in files: root = f.parts[0] if root.endswith(".dist-info") or root.endswith(".egg-info") or root == "..": diff --git a/releasenotes/notes/perf-telemetry-d9881d20f22013f7.yaml b/releasenotes/notes/perf-telemetry-d9881d20f22013f7.yaml new file mode 100644 index 00000000000..d946f8990c8 --- /dev/null +++ b/releasenotes/notes/perf-telemetry-d9881d20f22013f7.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + telemetry: improves periodic telemetry writer performance by removing + unnecessary calls to ``importlib.metadata`` for reporting imported dependencies. + diff --git a/tests/internal/test_packages.py b/tests/internal/test_packages.py index 3e41ecddc3b..9a5d3a9160f 100644 --- a/tests/internal/test_packages.py +++ b/tests/internal/test_packages.py @@ -1,5 +1,3 @@ -import os - import pytest from ddtrace.internal.packages import _third_party_packages @@ -40,28 +38,26 @@ def test_get_distributions(): pkg_resources_ws = {pkg.project_name.lower() for pkg in pkg_resources.working_set} importlib_pkgs = set() - for pkg in get_distributions(): - assert pkg.name - assert pkg.version - assert os.path.exists(pkg.path) + for name, version in get_distributions().items(): + assert version # The package name in typing_extensions-4.x.x.dist-info/METADATA is set to `typing_extensions` # this is inconsistent with the package name found in pkg_resources. The block below corrects this. # The correct package name is typing-extensions. # The issue exists in pkgutil-resolve-name package. - if pkg.name == "typing_extensions" and "typing-extensions" in pkg_resources_ws: + if name == "typing_extensions" and "typing-extensions" in pkg_resources_ws: importlib_pkgs.add("typing-extensions") - elif pkg.name == "pkgutil_resolve_name" and "pkgutil-resolve-name" in pkg_resources_ws: + elif name == "pkgutil_resolve_name" and "pkgutil-resolve-name" in pkg_resources_ws: importlib_pkgs.add("pkgutil-resolve-name") - elif pkg.name == "importlib_metadata" and "importlib-metadata" in pkg_resources_ws: + elif name == "importlib_metadata" and "importlib-metadata" in pkg_resources_ws: importlib_pkgs.add("importlib-metadata") - elif pkg.name == "importlib-metadata" and "importlib_metadata" in pkg_resources_ws: + elif name == "importlib-metadata" and "importlib_metadata" in pkg_resources_ws: importlib_pkgs.add("importlib_metadata") - elif pkg.name == "importlib-resources" and "importlib_resources" in pkg_resources_ws: + elif name == "importlib-resources" and "importlib_resources" in pkg_resources_ws: importlib_pkgs.add("importlib_resources") - elif pkg.name == "importlib_resources" and "importlib-resources" in pkg_resources_ws: + elif name == "importlib_resources" and "importlib-resources" in pkg_resources_ws: importlib_pkgs.add("importlib-resources") else: - importlib_pkgs.add(pkg.name) + importlib_pkgs.add(name) # assert that pkg_resources and importlib.metadata return the same packages assert pkg_resources_ws == importlib_pkgs