diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index ce435de36b..8660086feb 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -38,6 +38,7 @@ class CompressionAlgo(Enum): from typing import Any from typing import Sequence from typing import Tuple + from typing_extensions import Literal from typing_extensions import TypedDict from sentry_sdk._types import ( @@ -528,6 +529,7 @@ def __init__( profiles_sample_rate=None, # type: Optional[float] profiles_sampler=None, # type: Optional[TracesSampler] profiler_mode=None, # type: Optional[ProfilerMode] + profile_lifecycle="manual", # type: Literal["manual", "trace"] profile_session_sample_rate=None, # type: Optional[float] auto_enabling_integrations=True, # type: bool disabled_integrations=None, # type: Optional[Sequence[sentry_sdk.integrations.Integration]] diff --git a/sentry_sdk/profiler/continuous_profiler.py b/sentry_sdk/profiler/continuous_profiler.py index b07fbec998..1619925bd2 100644 --- a/sentry_sdk/profiler/continuous_profiler.py +++ b/sentry_sdk/profiler/continuous_profiler.py @@ -5,6 +5,7 @@ import threading import time import uuid +from collections import deque from datetime import datetime, timezone from sentry_sdk.consts import VERSION @@ -27,9 +28,11 @@ if TYPE_CHECKING: from typing import Any from typing import Callable + from typing import Deque from typing import Dict from typing import List from typing import Optional + from typing import Set from typing import Type from typing import Union from typing_extensions import TypedDict @@ -120,6 +123,9 @@ def setup_continuous_profiler(options, sdk_info, capture_func): def try_autostart_continuous_profiler(): # type: () -> None + + # TODO: deprecate this as it'll be replaced by the auto lifecycle option + if _scheduler is None: return @@ -129,6 +135,14 @@ def try_autostart_continuous_profiler(): _scheduler.manual_start() +def try_profile_lifecycle_trace_start(): + # type: () -> Union[ContinuousProfile, None] + if _scheduler is None: + return None + + return _scheduler.auto_start() + + def start_profiler(): # type: () -> None if _scheduler is None: @@ -170,6 +184,14 @@ def determine_profile_session_sampling_decision(sample_rate): return random.random() < float(sample_rate) +class ContinuousProfile: + active: bool = True + + def stop(self): + # type: () -> None + self.active = False + + class ContinuousScheduler: mode = "unknown" # type: ContinuousProfilerMode @@ -179,16 +201,21 @@ def __init__(self, frequency, options, sdk_info, capture_func): self.options = options self.sdk_info = sdk_info self.capture_func = capture_func + + self.lifecycle = self.options.get("profile_lifecycle") + profile_session_sample_rate = self.options.get("profile_session_sample_rate") + self.sampled = determine_profile_session_sampling_decision( + profile_session_sample_rate + ) + self.sampler = self.make_sampler() self.buffer = None # type: Optional[ProfileBuffer] self.pid = None # type: Optional[int] self.running = False - profile_session_sample_rate = self.options.get("profile_session_sample_rate") - self.sampled = determine_profile_session_sampling_decision( - profile_session_sample_rate - ) + self.new_profiles = deque(maxlen=128) # type: Deque[ContinuousProfile] + self.active_profiles = set() # type: Set[ContinuousProfile] def is_auto_start_enabled(self): # type: () -> bool @@ -207,15 +234,38 @@ def is_auto_start_enabled(self): return experiments.get("continuous_profiling_auto_start") + def auto_start(self): + # type: () -> Union[ContinuousProfile, None] + if not self.sampled: + return None + + if self.lifecycle != "trace": + return None + + logger.debug("[Profiling] Auto starting profiler") + + profile = ContinuousProfile() + + self.new_profiles.append(profile) + self.ensure_running() + + return profile + def manual_start(self): # type: () -> None if not self.sampled: return + if self.lifecycle != "manual": + return + self.ensure_running() def manual_stop(self): # type: () -> None + if self.lifecycle != "manual": + return + self.teardown() def ensure_running(self): @@ -249,28 +299,97 @@ def make_sampler(self): cache = LRUCache(max_size=256) - def _sample_stack(*args, **kwargs): - # type: (*Any, **Any) -> None - """ - Take a sample of the stack on all the threads in the process. - This should be called at a regular interval to collect samples. - """ - - ts = now() - - try: - sample = [ - (str(tid), extract_stack(frame, cache, cwd)) - for tid, frame in sys._current_frames().items() - ] - except AttributeError: - # For some reason, the frame we get doesn't have certain attributes. - # When this happens, we abandon the current sample as it's bad. - capture_internal_exception(sys.exc_info()) - return - - if self.buffer is not None: - self.buffer.write(ts, sample) + if self.lifecycle == "trace": + + def _sample_stack(*args, **kwargs): + # type: (*Any, **Any) -> None + """ + Take a sample of the stack on all the threads in the process. + This should be called at a regular interval to collect samples. + """ + + # no profiles taking place, so we can stop early + if not self.new_profiles and not self.active_profiles: + self.running = False + return + + # This is the number of profiles we want to pop off. + # It's possible another thread adds a new profile to + # the list and we spend longer than we want inside + # the loop below. + # + # Also make sure to set this value before extracting + # frames so we do not write to any new profiles that + # were started after this point. + new_profiles = len(self.new_profiles) + + ts = now() + + try: + sample = [ + (str(tid), extract_stack(frame, cache, cwd)) + for tid, frame in sys._current_frames().items() + ] + except AttributeError: + # For some reason, the frame we get doesn't have certain attributes. + # When this happens, we abandon the current sample as it's bad. + capture_internal_exception(sys.exc_info()) + return + + # Move the new profiles into the active_profiles set. + # + # We cannot directly add the to active_profiles set + # in `start_profiling` because it is called from other + # threads which can cause a RuntimeError when it the + # set sizes changes during iteration without a lock. + # + # We also want to avoid using a lock here so threads + # that are starting profiles are not blocked until it + # can acquire the lock. + for _ in range(new_profiles): + self.active_profiles.add(self.new_profiles.popleft()) + inactive_profiles = [] + + for profile in self.active_profiles: + if profile.active: + pass + else: + # If a profile is marked inactive, we buffer it + # to `inactive_profiles` so it can be removed. + # We cannot remove it here as it would result + # in a RuntimeError. + inactive_profiles.append(profile) + + for profile in inactive_profiles: + self.active_profiles.remove(profile) + + if self.buffer is not None: + self.buffer.write(ts, sample) + + else: + + def _sample_stack(*args, **kwargs): + # type: (*Any, **Any) -> None + """ + Take a sample of the stack on all the threads in the process. + This should be called at a regular interval to collect samples. + """ + + ts = now() + + try: + sample = [ + (str(tid), extract_stack(frame, cache, cwd)) + for tid, frame in sys._current_frames().items() + ] + except AttributeError: + # For some reason, the frame we get doesn't have certain attributes. + # When this happens, we abandon the current sample as it's bad. + capture_internal_exception(sys.exc_info()) + return + + if self.buffer is not None: + self.buffer.write(ts, sample) return _sample_stack @@ -294,6 +413,7 @@ def run(self): if self.buffer is not None: self.buffer.flush() + self.buffer = None class ThreadContinuousScheduler(ContinuousScheduler): diff --git a/sentry_sdk/profiler/transaction_profiler.py b/sentry_sdk/profiler/transaction_profiler.py index f579c441fa..3743b7c905 100644 --- a/sentry_sdk/profiler/transaction_profiler.py +++ b/sentry_sdk/profiler/transaction_profiler.py @@ -644,7 +644,7 @@ def _sample_stack(*args, **kwargs): if profile.active: profile.write(now, sample) else: - # If a thread is marked inactive, we buffer it + # If a profile is marked inactive, we buffer it # to `inactive_profiles` so it can be removed. # We cannot remove it here as it would result # in a RuntimeError. diff --git a/sentry_sdk/scope.py b/sentry_sdk/scope.py index c22cdfb030..9af9769ee5 100644 --- a/sentry_sdk/scope.py +++ b/sentry_sdk/scope.py @@ -12,7 +12,11 @@ from sentry_sdk.attachments import Attachment from sentry_sdk.consts import DEFAULT_MAX_BREADCRUMBS, FALSE_VALUES, INSTRUMENTER from sentry_sdk.feature_flags import FlagBuffer, DEFAULT_FLAG_CAPACITY -from sentry_sdk.profiler.continuous_profiler import try_autostart_continuous_profiler +from sentry_sdk.profiler.continuous_profiler import ( + get_profiler_id, + try_autostart_continuous_profiler, + try_profile_lifecycle_trace_start, +) from sentry_sdk.profiler.transaction_profiler import Profile from sentry_sdk.session import Session from sentry_sdk.tracing_utils import ( @@ -1051,6 +1055,14 @@ def start_transaction( transaction._profile = profile + transaction._continuous_profile = try_profile_lifecycle_trace_start() + + # Typically, the profiler is set when the transaction is created. But when + # using the auto lifecycle, the profiler isn't running when the first + # transaction is started. So make sure we update the profiler id on it. + if transaction._continuous_profile is not None: + transaction.set_profiler_id(get_profiler_id()) + # we don't bother to keep spans if we already know we're not going to # send the transaction max_spans = (client.options["_experiments"].get("max_spans")) or 1000 diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py index 59473d752c..b7dc437506 100644 --- a/sentry_sdk/tracing.py +++ b/sentry_sdk/tracing.py @@ -34,7 +34,8 @@ P = ParamSpec("P") R = TypeVar("R") - import sentry_sdk.profiler + from sentry_sdk.profiler.continuous_profiler import ContinuousProfile + from sentry_sdk.profiler.transaction_profiler import Profile from sentry_sdk._types import ( Event, MeasurementUnit, @@ -767,6 +768,7 @@ class Transaction(Span): "_measurements", "_contexts", "_profile", + "_continuous_profile", "_baggage", ) @@ -788,9 +790,8 @@ def __init__( # type: ignore[misc] self.parent_sampled = parent_sampled self._measurements = {} # type: Dict[str, MeasurementValue] self._contexts = {} # type: Dict[str, Any] - self._profile = ( - None - ) # type: Optional[sentry_sdk.profiler.transaction_profiler.Profile] + self._profile = None # type: Optional[Profile] + self._continuous_profile = None # type: Optional[ContinuousProfile] self._baggage = baggage def __repr__(self): @@ -843,6 +844,9 @@ def __exit__(self, ty, value, tb): if self._profile is not None: self._profile.__exit__(ty, value, tb) + if self._continuous_profile is not None: + self._continuous_profile.stop() + super().__exit__(ty, value, tb) @property diff --git a/tests/profiler/test_continuous_profiler.py b/tests/profiler/test_continuous_profiler.py index 6f4893e59d..331080df83 100644 --- a/tests/profiler/test_continuous_profiler.py +++ b/tests/profiler/test_continuous_profiler.py @@ -8,6 +8,7 @@ import sentry_sdk from sentry_sdk.consts import VERSION from sentry_sdk.profiler.continuous_profiler import ( + get_profiler_id, setup_continuous_profiler, start_profiler, stop_profiler, @@ -24,9 +25,12 @@ def get_client_options(use_top_level_profiler_mode): - def client_options(mode=None, auto_start=None, profile_session_sample_rate=1.0): + def client_options( + mode=None, auto_start=None, profile_session_sample_rate=1.0, lifecycle="manual" + ): if use_top_level_profiler_mode: return { + "profile_lifecycle": lifecycle, "profiler_mode": mode, "profile_session_sample_rate": profile_session_sample_rate, "_experiments": { @@ -34,6 +38,7 @@ def client_options(mode=None, auto_start=None, profile_session_sample_rate=1.0): }, } return { + "profile_lifecycle": lifecycle, "profile_session_sample_rate": profile_session_sample_rate, "_experiments": { "continuous_profiling_auto_start": auto_start, @@ -121,14 +126,17 @@ def test_continuous_profiler_setup_twice(mode, make_options, teardown_profiling) ) -def assert_single_transaction_with_profile_chunks(envelopes, thread): +def assert_single_transaction_with_profile_chunks( + envelopes, thread, max_chunks, transactions=1 +): items = defaultdict(list) for envelope in envelopes: for item in envelope.items: items[item.type].append(item) - assert len(items["transaction"]) == 1 + assert len(items["transaction"]) == transactions assert len(items["profile_chunk"]) > 0 + assert len(items["profile_chunk"]) <= max_chunks transaction = items["transaction"][0].payload.json @@ -163,6 +171,7 @@ def assert_single_transaction_with_profile_chunks(envelopes, thread): for profile_chunk_item in items["profile_chunk"]: profile_chunk = profile_chunk_item.payload.json + del profile_chunk["profile"] # make the diff easier to read assert profile_chunk == ApproxDict( { "client_sdk": { @@ -224,9 +233,9 @@ def test_continuous_profiler_auto_start_and_manual_stop( with sentry_sdk.start_transaction(name="profiling"): with sentry_sdk.start_span(op="op"): - time.sleep(0.1) + time.sleep(0.05) - assert_single_transaction_with_profile_chunks(envelopes, thread) + assert_single_transaction_with_profile_chunks(envelopes, thread, max_chunks=10) for _ in range(3): stop_profiler() @@ -235,7 +244,7 @@ def test_continuous_profiler_auto_start_and_manual_stop( with sentry_sdk.start_transaction(name="profiling"): with sentry_sdk.start_span(op="op"): - time.sleep(0.1) + time.sleep(0.05) assert_single_transaction_without_profile_chunks(envelopes) @@ -245,9 +254,9 @@ def test_continuous_profiler_auto_start_and_manual_stop( with sentry_sdk.start_transaction(name="profiling"): with sentry_sdk.start_span(op="op"): - time.sleep(0.1) + time.sleep(0.05) - assert_single_transaction_with_profile_chunks(envelopes, thread) + assert_single_transaction_with_profile_chunks(envelopes, thread, max_chunks=10) @pytest.mark.parametrize( @@ -272,7 +281,9 @@ def test_continuous_profiler_manual_start_and_stop_sampled( make_options, teardown_profiling, ): - options = make_options(mode=mode) + options = make_options( + mode=mode, profile_session_sample_rate=1.0, lifecycle="manual" + ) sentry_init( traces_sample_rate=1.0, **options, @@ -291,7 +302,7 @@ def test_continuous_profiler_manual_start_and_stop_sampled( with sentry_sdk.start_span(op="op"): time.sleep(0.05) - assert_single_transaction_with_profile_chunks(envelopes, thread) + assert_single_transaction_with_profile_chunks(envelopes, thread, max_chunks=10) stop_profiler() @@ -325,7 +336,9 @@ def test_continuous_profiler_manual_start_and_stop_unsampled( make_options, teardown_profiling, ): - options = make_options(mode=mode, profile_session_sample_rate=0.0) + options = make_options( + mode=mode, profile_session_sample_rate=0.0, lifecycle="manual" + ) sentry_init( traces_sample_rate=1.0, **options, @@ -342,3 +355,156 @@ def test_continuous_profiler_manual_start_and_stop_unsampled( assert_single_transaction_without_profile_chunks(envelopes) stop_profiler() + + +@pytest.mark.parametrize( + "mode", + [ + pytest.param("thread"), + pytest.param("gevent", marks=requires_gevent), + ], +) +@pytest.mark.parametrize( + "make_options", + [ + pytest.param(get_client_options(True), id="non-experiment"), + pytest.param(get_client_options(False), id="experiment"), + ], +) +@mock.patch("sentry_sdk.profiler.continuous_profiler.DEFAULT_SAMPLING_FREQUENCY", 21) +def test_continuous_profiler_auto_start_and_stop_sampled( + sentry_init, + capture_envelopes, + mode, + make_options, + teardown_profiling, +): + options = make_options( + mode=mode, profile_session_sample_rate=1.0, lifecycle="trace" + ) + sentry_init( + traces_sample_rate=1.0, + **options, + ) + + envelopes = capture_envelopes() + + thread = threading.current_thread() + + for _ in range(3): + envelopes.clear() + + with sentry_sdk.start_transaction(name="profiling 1"): + assert get_profiler_id() is not None, "profiler should be running" + with sentry_sdk.start_span(op="op"): + time.sleep(0.03) + assert get_profiler_id() is not None, "profiler should be running" + + # the profiler takes a while to stop so if we start a transaction + # immediately, it'll be part of the same chunk + assert get_profiler_id() is not None, "profiler should be running" + + with sentry_sdk.start_transaction(name="profiling 2"): + assert get_profiler_id() is not None, "profiler should be running" + with sentry_sdk.start_span(op="op"): + time.sleep(0.03) + assert get_profiler_id() is not None, "profiler should be running" + + # wait at least 1 cycle for the profiler to stop + time.sleep(0.2) + assert get_profiler_id() is None, "profiler should not be running" + + assert_single_transaction_with_profile_chunks( + envelopes, thread, max_chunks=1, transactions=2 + ) + + +@pytest.mark.parametrize( + "mode", + [ + pytest.param("thread"), + pytest.param("gevent", marks=requires_gevent), + ], +) +@pytest.mark.parametrize( + "make_options", + [ + pytest.param(get_client_options(True), id="non-experiment"), + pytest.param(get_client_options(False), id="experiment"), + ], +) +@mock.patch("sentry_sdk.profiler.continuous_profiler.PROFILE_BUFFER_SECONDS", 0.01) +def test_continuous_profiler_auto_start_and_stop_unsampled( + sentry_init, + capture_envelopes, + mode, + make_options, + teardown_profiling, +): + options = make_options( + mode=mode, profile_session_sample_rate=0.0, lifecycle="trace" + ) + sentry_init( + traces_sample_rate=1.0, + **options, + ) + + envelopes = capture_envelopes() + + for _ in range(3): + envelopes.clear() + + with sentry_sdk.start_transaction(name="profiling"): + assert get_profiler_id() is None, "profiler should not be running" + with sentry_sdk.start_span(op="op"): + time.sleep(0.05) + assert get_profiler_id() is None, "profiler should not be running" + + assert get_profiler_id() is None, "profiler should not be running" + assert_single_transaction_without_profile_chunks(envelopes) + + +@pytest.mark.parametrize( + ["mode", "class_name"], + [ + pytest.param("thread", "ThreadContinuousScheduler"), + pytest.param( + "gevent", + "GeventContinuousScheduler", + marks=requires_gevent, + ), + ], +) +@pytest.mark.parametrize( + "make_options", + [ + pytest.param(get_client_options(True), id="non-experiment"), + pytest.param(get_client_options(False), id="experiment"), + ], +) +def test_continuous_profiler_manual_start_and_stop_noop_when_using_trace_lifecyle( + sentry_init, + mode, + class_name, + make_options, + teardown_profiling, +): + options = make_options( + mode=mode, profile_session_sample_rate=0.0, lifecycle="trace" + ) + sentry_init( + traces_sample_rate=1.0, + **options, + ) + + with mock.patch( + f"sentry_sdk.profiler.continuous_profiler.{class_name}.ensure_running" + ) as mock_ensure_running: + start_profiler() + mock_ensure_running.assert_not_called() + + with mock.patch( + f"sentry_sdk.profiler.continuous_profiler.{class_name}.teardown" + ) as mock_teardown: + stop_profiler() + mock_teardown.assert_not_called()