From 61fcecb3f2c86f18571794b4eac691bb44e7065e Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 25 Apr 2025 13:15:06 -0400 Subject: [PATCH 1/6] perf(telemetry): avoid invoking importlib.metadata --- ddtrace/internal/packages.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/ddtrace/internal/packages.py b/ddtrace/internal/packages.py index cbadd5168c1..76735a7c80a 100644 --- a/ddtrace/internal/packages.py +++ b/ddtrace/internal/packages.py @@ -76,6 +76,17 @@ def get_module_distribution_versions(module_name: str) -> t.Optional[t.Tuple[str names: t.List[str] = [] pkgs = get_package_distributions() while names == []: + # First try to resolve the module name from package distributions + names = pkgs.get(module_name, []) + if not names: + # try to resolve the parent package + p = module_name.rfind(".") + if p > 0: + module_name = module_name[:p] + else: + break + # If we failed to do so, then invoke the expensive importlib_metadata.distribution + # to get the package metadata try: package = importlib_metadata.distribution(module_name) metadata = package.metadata @@ -85,14 +96,6 @@ def get_module_distribution_versions(module_name: str) -> t.Optional[t.Tuple[str return (name, version) except Exception: # nosec pass - names = pkgs.get(module_name, []) - if not names: - # try to resolve the parent package - p = module_name.rfind(".") - if p > 0: - module_name = module_name[:p] - else: - break if len(names) != 1: # either it was not resolved due to multiple packages with the same name # or it's a multipurpose package (like '__pycache__') @@ -100,7 +103,7 @@ def get_module_distribution_versions(module_name: str) -> t.Optional[t.Tuple[str return (names[0], get_version_for_package(names[0])) -@cached(maxsize=256) +@cached(maxsize=1024) def get_version_for_package(name): # type: (str) -> str """returns the version of a package""" From 610fa76f4ff0b224a018e977b4de2ac2b28fa67c Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 25 Apr 2025 13:36:53 -0400 Subject: [PATCH 2/6] fixups --- ddtrace/internal/debug.py | 2 +- ddtrace/internal/packages.py | 35 +++++++++++---------------------- tests/internal/test_packages.py | 20 +++++++++---------- 3 files changed, 21 insertions(+), 36 deletions(-) diff --git a/ddtrace/internal/debug.py b/ddtrace/internal/debug.py index e3c1cd181d8..c70142baeae 100644 --- a/ddtrace/internal/debug.py +++ b/ddtrace/internal/debug.py @@ -75,7 +75,7 @@ def collect(tracer): is_venv = in_venv() - packages_available = {p.name: p.version for p in get_distributions()} + packages_available = {name: version for (name, version) in get_distributions().items()} integration_configs = {} # type: Dict[str, Union[Dict[str, Any], str]] for module, enabled in ddtrace._monkey.PATCH_MODULES.items(): # TODO: this check doesn't work in all cases... we need a mapping diff --git a/ddtrace/internal/packages.py b/ddtrace/internal/packages.py index 76735a7c80a..196d203e502 100644 --- a/ddtrace/internal/packages.py +++ b/ddtrace/internal/packages.py @@ -3,7 +3,6 @@ from functools import singledispatch import inspect import logging -from os import fspath # noqa:F401 import sys import sysconfig from types import ModuleType @@ -17,32 +16,29 @@ LOG = logging.getLogger(__name__) +Distribution = t.NamedTuple("Distribution", [("name", str), ("version", str)]) -Distribution = t.NamedTuple("Distribution", [("name", str), ("version", str), ("path", t.Optional[str])]) _PACKAGE_DISTRIBUTIONS: t.Optional[t.Mapping[str, t.List[str]]] = None @callonce -def get_distributions(): - # type: () -> t.Set[Distribution] - """returns the name and version of all distributions in a python path""" +def get_distributions() -> t.Mapping[str, str]: + """returns the mapping from distribution name to version for all distributions in a python path""" try: import importlib.metadata as importlib_metadata except ImportError: import importlib_metadata # type: ignore[no-redef] - pkgs = set() + pkgs = {} for dist in importlib_metadata.distributions(): - # Get the root path of all files in a distribution - path = str(dist.locate_file("")) # PKG-INFO and/or METADATA files are parsed when dist.metadata is accessed # Optimization: we should avoid accessing dist.metadata more than once metadata = dist.metadata - name = metadata["name"] + name = metadata["name"].lower() version = metadata["version"] if name and version: - pkgs.add(Distribution(path=path, name=name.lower(), version=version)) + pkgs[name] = version return pkgs @@ -68,13 +64,10 @@ def get_package_distributions() -> t.Mapping[str, t.List[str]]: def get_module_distribution_versions(module_name: str) -> t.Optional[t.Tuple[str, str]]: if not module_name: return None - try: - import importlib.metadata as importlib_metadata - except ImportError: - import importlib_metadata # type: ignore[no-redef] names: t.List[str] = [] pkgs = get_package_distributions() + dist_map = get_distributions() while names == []: # First try to resolve the module name from package distributions names = pkgs.get(module_name, []) @@ -85,16 +78,10 @@ def get_module_distribution_versions(module_name: str) -> t.Optional[t.Tuple[str module_name = module_name[:p] else: break - # If we failed to do so, then invoke the expensive importlib_metadata.distribution - # to get the package metadata try: - package = importlib_metadata.distribution(module_name) - metadata = package.metadata - name = metadata["name"] - version = metadata["version"] - if name and version: - return (name, version) - except Exception: # nosec + dist = dist_map[module_name] + return (dist.name, dist.version) + except Exception: pass if len(names) != 1: # either it was not resolved due to multiple packages with the same name @@ -197,7 +184,7 @@ def is_namespace(f: importlib_metadata.PackagePath): if not (files := dist.files): continue metadata = dist.metadata - d = Distribution(name=metadata["name"], version=metadata["version"], path=None) + d = Distribution(name=metadata["name"], version=metadata["version"]) for f in files: root = f.parts[0] if root.endswith(".dist-info") or root.endswith(".egg-info") or root == "..": diff --git a/tests/internal/test_packages.py b/tests/internal/test_packages.py index 3e41ecddc3b..e20507772b5 100644 --- a/tests/internal/test_packages.py +++ b/tests/internal/test_packages.py @@ -40,28 +40,26 @@ def test_get_distributions(): pkg_resources_ws = {pkg.project_name.lower() for pkg in pkg_resources.working_set} importlib_pkgs = set() - for pkg in get_distributions(): - assert pkg.name - assert pkg.version - assert os.path.exists(pkg.path) + for name, version in get_distributions().items(): + assert version # The package name in typing_extensions-4.x.x.dist-info/METADATA is set to `typing_extensions` # this is inconsistent with the package name found in pkg_resources. The block below corrects this. # The correct package name is typing-extensions. # The issue exists in pkgutil-resolve-name package. - if pkg.name == "typing_extensions" and "typing-extensions" in pkg_resources_ws: + if name == "typing_extensions" and "typing-extensions" in pkg_resources_ws: importlib_pkgs.add("typing-extensions") - elif pkg.name == "pkgutil_resolve_name" and "pkgutil-resolve-name" in pkg_resources_ws: + elif name == "pkgutil_resolve_name" and "pkgutil-resolve-name" in pkg_resources_ws: importlib_pkgs.add("pkgutil-resolve-name") - elif pkg.name == "importlib_metadata" and "importlib-metadata" in pkg_resources_ws: + elif name == "importlib_metadata" and "importlib-metadata" in pkg_resources_ws: importlib_pkgs.add("importlib-metadata") - elif pkg.name == "importlib-metadata" and "importlib_metadata" in pkg_resources_ws: + elif name == "importlib-metadata" and "importlib_metadata" in pkg_resources_ws: importlib_pkgs.add("importlib_metadata") - elif pkg.name == "importlib-resources" and "importlib_resources" in pkg_resources_ws: + elif name == "importlib-resources" and "importlib_resources" in pkg_resources_ws: importlib_pkgs.add("importlib_resources") - elif pkg.name == "importlib_resources" and "importlib-resources" in pkg_resources_ws: + elif name == "importlib_resources" and "importlib-resources" in pkg_resources_ws: importlib_pkgs.add("importlib-resources") else: - importlib_pkgs.add(pkg.name) + importlib_pkgs.add(name) # assert that pkg_resources and importlib.metadata return the same packages assert pkg_resources_ws == importlib_pkgs From a54cd53d3bb8682dcd483d13fcf3db3f3e961692 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 25 Apr 2025 13:39:14 -0400 Subject: [PATCH 3/6] fix format --- tests/internal/test_packages.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/internal/test_packages.py b/tests/internal/test_packages.py index e20507772b5..9a5d3a9160f 100644 --- a/tests/internal/test_packages.py +++ b/tests/internal/test_packages.py @@ -1,5 +1,3 @@ -import os - import pytest from ddtrace.internal.packages import _third_party_packages From 9363e666948aa9a33d31b62a1a3bc0691b4f02a3 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 25 Apr 2025 13:47:35 -0400 Subject: [PATCH 4/6] avoid try except --- ddtrace/internal/packages.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/ddtrace/internal/packages.py b/ddtrace/internal/packages.py index 196d203e502..969005d8961 100644 --- a/ddtrace/internal/packages.py +++ b/ddtrace/internal/packages.py @@ -78,11 +78,9 @@ def get_module_distribution_versions(module_name: str) -> t.Optional[t.Tuple[str module_name = module_name[:p] else: break - try: - dist = dist_map[module_name] - return (dist.name, dist.version) - except Exception: - pass + version = dist_map.get(module_name) + if version: + return (module_name, version) if len(names) != 1: # either it was not resolved due to multiple packages with the same name # or it's a multipurpose package (like '__pycache__') From 2c264ec5b6ddfb27d705d749dab94ed5d46ee283 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 25 Apr 2025 15:20:54 -0400 Subject: [PATCH 5/6] update --- ddtrace/internal/packages.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ddtrace/internal/packages.py b/ddtrace/internal/packages.py index 969005d8961..7f0a37ae87b 100644 --- a/ddtrace/internal/packages.py +++ b/ddtrace/internal/packages.py @@ -70,17 +70,17 @@ def get_module_distribution_versions(module_name: str) -> t.Optional[t.Tuple[str dist_map = get_distributions() while names == []: # First try to resolve the module name from package distributions + version = dist_map.get(module_name) + if version: + return (module_name, version) + # Since we've failed to resolve, try to resolve the parent package names = pkgs.get(module_name, []) if not names: - # try to resolve the parent package p = module_name.rfind(".") if p > 0: module_name = module_name[:p] else: break - version = dist_map.get(module_name) - if version: - return (module_name, version) if len(names) != 1: # either it was not resolved due to multiple packages with the same name # or it's a multipurpose package (like '__pycache__') From 58ec39a8a50c1acc72b9afa9bfadf0c977fd8f52 Mon Sep 17 00:00:00 2001 From: Taegyun Kim Date: Fri, 25 Apr 2025 15:28:51 -0400 Subject: [PATCH 6/6] reno --- releasenotes/notes/perf-telemetry-d9881d20f22013f7.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 releasenotes/notes/perf-telemetry-d9881d20f22013f7.yaml diff --git a/releasenotes/notes/perf-telemetry-d9881d20f22013f7.yaml b/releasenotes/notes/perf-telemetry-d9881d20f22013f7.yaml new file mode 100644 index 00000000000..d946f8990c8 --- /dev/null +++ b/releasenotes/notes/perf-telemetry-d9881d20f22013f7.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + telemetry: improves periodic telemetry writer performance by removing + unnecessary calls to ``importlib.metadata`` for reporting imported dependencies. +