feat(tracing): Use sample_rand for sampling decisions

szokeasaurusrex · szokeasaurusrex · commit c3d6d12912fb · 2025-02-12T12:39:54.000+01:00
Use the `sample_rand` value from an incoming trace to make sampling decisions, rather than generating a random value. When we are the head SDK starting a new trace, save our randomly-generated value as the `sample_rand`, and also change the random generation logic so that the `sample_rand` is computed deterministically based on the `trace_id`. Closes #3998
diff --git a/sentry_sdk/tracing.py b/sentry_sdk/tracing.py
@@ -1,7 +1,7 @@
 import uuid
-import random
 import warnings
 from datetime import datetime, timedelta, timezone
+from random import Random
 
 import sentry_sdk
 from sentry_sdk.consts import INSTRUMENTER, SPANSTATUS, SPANDATA
@@ -774,6 +774,7 @@ class Transaction(Span):
         "_contexts",
         "_profile",
         "_baggage",
+        "_sample_rand",
     )
 
     def __init__(  # type: ignore[misc]
@@ -799,6 +800,14 @@ def __init__(  # type: ignore[misc]
         )  # type: Optional[sentry_sdk.profiler.transaction_profiler.Profile]
         self._baggage = baggage
 
+        baggage_sample_rand = (
+            None if self._baggage is None else self._baggage._sample_rand()
+        )
+        if baggage_sample_rand is not None:
+            self._sample_rand = baggage_sample_rand
+        else:
+            self._sample_rand = Random(self.trace_id).random()
+
     def __repr__(self):
         # type: () -> str
         return (
@@ -1167,10 +1176,10 @@ def _set_initial_sampling_decision(self, sampling_context):
             self.sampled = False
             return
 
-        # Now we roll the dice. random.random is inclusive of 0, but not of 1,
+        # Now we roll the dice. self._sample_rand is inclusive of 0, but not of 1,
         # so strict < is safe here. In case sample_rate is a boolean, cast it
         # to a float (True becomes 1.0 and False becomes 0.0)
-        self.sampled = random.random() < self.sample_rate
+        self.sampled = self._sample_rand < self.sample_rate
 
         if self.sampled:
             logger.debug(
diff --git a/sentry_sdk/tracing_utils.py b/sentry_sdk/tracing_utils.py
@@ -630,6 +630,7 @@ def populate_from_transaction(cls, transaction):
         options = client.options or {}
 
         sentry_items["trace_id"] = transaction.trace_id
+        sentry_items["sample_rand"] = str(transaction._sample_rand)
 
         if options.get("environment"):
             sentry_items["environment"] = options["environment"]
@@ -702,6 +703,20 @@ def strip_sentry_baggage(header):
             )
         )
 
+    def _sample_rand(self):
+        # type: () -> Optional[float]
+        """Convenience method to get the sample_rand value from the sentry_items.
+
+        We validate the value and parse it as a float before returning it. The value is considered
+        valid if it is a float in the range [0, 1).
+        """
+        sample_rand = _try_float(self.sentry_items.get("sample_rand"))
+
+        if sample_rand is not None and 0 <= sample_rand < 1:
+            return sample_rand
+
+        return None
+
 
 def should_propagate_trace(client, url):
     # type: (sentry_sdk.client.BaseClient, str) -> bool
diff --git a/tests/tracing/test_sample_rand.py b/tests/tracing/test_sample_rand.py
@@ -0,0 +1,63 @@
+import pytest
+
+import sentry_sdk
+from sentry_sdk.tracing_utils import Baggage
+
+TEST_TRACE_ID_SAMPLE_RANDS = {
+    "00000000000000000000000000000000": 0.8766381713144122,
+    "01234567012345670123456701234567": 0.6451742521664413,
+    "0123456789abcdef0123456789abcdef": 0.9338861957669223,
+}
+"""
+A dictionary of some trace IDs used in the tests, and their precomputed sample_rand values.
+
+sample_rand values are pseudo-random numbers, deterministically generated from the trace ID.
+"""
+
+
+@pytest.mark.parametrize(
+    ("trace_id", "expected_sample_rand"),
+    TEST_TRACE_ID_SAMPLE_RANDS.items(),
+)
+# test 21 linearly spaced sample_rate values from 0.0 to 1.0, inclusive
+@pytest.mark.parametrize("sample_rate", (i / 20 for i in range(21)))
+def test_deterministic_sampled(
+    sentry_init, capture_events, sample_rate, trace_id, expected_sample_rand
+):
+    """
+    Test that the sample_rand value is deterministic based on the trace ID, and
+    that it is used to determine the sampling decision. Also, ensure that the
+    transaction's baggage contains the sample_rand value.
+    """
+    sentry_init(traces_sample_rate=sample_rate)
+    events = capture_events()
+
+    with sentry_sdk.start_transaction(trace_id=trace_id) as transaction:
+        assert transaction.get_baggage().sentry_items["sample_rand"] == str(
+            expected_sample_rand
+        )
+
+    # Transaction event captured if sample_rand < sample_rate, indicating that
+    # sample_rand is used to make the sampling decision.
+    assert len(events) == int(expected_sample_rand < sample_rate)
+
+
+@pytest.mark.parametrize("sample_rand", (0.0, 0.2, 0.4, 0.6, 0.8))
+@pytest.mark.parametrize("sample_rate", (0.0, 0.2, 0.4, 0.6, 0.8, 1.0))
+def test_transaction_uses_incoming_sample_rand(
+    sentry_init, capture_events, sample_rate, sample_rand
+):
+    """
+    Test that the transaction uses the sample_rand value from the incoming baggage.
+    """
+    baggage = Baggage(sentry_items={"sample_rand": str(sample_rand)})
+
+    sentry_init(traces_sample_rate=sample_rate)
+    events = capture_events()
+
+    with sentry_sdk.start_transaction(baggage=baggage) as transaction:
+        assert transaction.get_baggage().sentry_items["sample_rand"] == str(sample_rand)
+
+    # Transaction event captured if sample_rand < sample_rate, indicating that
+    # sample_rand is used to make the sampling decision.
+    assert len(events) == int(sample_rand < sample_rate)