From aaa78c42528274ba51b4adb9bc1c080c2bbb0705 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 5 Jun 2025 13:27:53 +0200 Subject: [PATCH 1/9] store a hash of the exc instead of full exc --- sentry_sdk/integrations/dedupe.py | 34 +++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/dedupe.py b/sentry_sdk/integrations/dedupe.py index a115e35292..84e2182359 100644 --- a/sentry_sdk/integrations/dedupe.py +++ b/sentry_sdk/integrations/dedupe.py @@ -18,6 +18,33 @@ def __init__(self): # type: () -> None self._last_seen = ContextVar("last-seen") + @staticmethod + def _get_exception_hash(exc): + # type: (Exception) -> int + """ + Create a memory-efficient hash for an exception. + + Instead of storing the entire exception object, we store just enough + information to identify it uniquely. This avoids keeping the traceback + and local variables in memory. + """ + # Get the exception type name and message + exc_type = type(exc).__name__ + exc_message = str(exc) + + # Get the first frame of the traceback if it exists + if hasattr(exc, "__traceback__") and exc.__traceback__: + frame = exc.__traceback__.tb_frame + filename = frame.f_code.co_filename + lineno = frame.f_lineno + func_name = frame.f_code.co_name + location = f"{filename}:{lineno}:{func_name}" # noqa: E231 + else: + location = None + + # Create a tuple of the essential information and hash it + return hash((exc_type, exc_message, location)) + @staticmethod def setup_once(): # type: () -> None @@ -36,9 +63,12 @@ def processor(event, hint): return event exc = exc_info[1] - if integration._last_seen.get(None) is exc: + exc_hash = DedupeIntegration._get_exception_hash(exc) + + if integration._last_seen.get(None) == exc_hash: return None - integration._last_seen.set(exc) + + integration._last_seen.set(exc_hash) return event @staticmethod From 549fae8578c5b9a9fa5a2ea3660c664cdfaf5dd4 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 5 Jun 2025 13:38:36 +0200 Subject: [PATCH 2/9] getting the full stacktrace --- sentry_sdk/integrations/dedupe.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/sentry_sdk/integrations/dedupe.py b/sentry_sdk/integrations/dedupe.py index 84e2182359..2f643dfdcd 100644 --- a/sentry_sdk/integrations/dedupe.py +++ b/sentry_sdk/integrations/dedupe.py @@ -32,18 +32,20 @@ def _get_exception_hash(exc): exc_type = type(exc).__name__ exc_message = str(exc) - # Get the first frame of the traceback if it exists + # Get the full stacktrace + stacktrace = [] if hasattr(exc, "__traceback__") and exc.__traceback__: - frame = exc.__traceback__.tb_frame - filename = frame.f_code.co_filename - lineno = frame.f_lineno - func_name = frame.f_code.co_name - location = f"{filename}:{lineno}:{func_name}" # noqa: E231 - else: - location = None + tb = exc.__traceback__ + while tb: + frame = tb.tb_frame + filename = frame.f_code.co_filename + lineno = tb.tb_lineno + func_name = frame.f_code.co_name + stacktrace.append((filename, lineno, func_name)) + tb = tb.tb_next # Create a tuple of the essential information and hash it - return hash((exc_type, exc_message, location)) + return hash((exc_type, exc_message, tuple(stacktrace))) @staticmethod def setup_once(): From 3d66381f135569e2c6a57d8f5a4667579905b99f Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 5 Jun 2025 14:43:27 +0200 Subject: [PATCH 3/9] trying with an hash of the event stacktrace --- sentry_sdk/integrations/dedupe.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sentry_sdk/integrations/dedupe.py b/sentry_sdk/integrations/dedupe.py index 2f643dfdcd..c7d2d56f25 100644 --- a/sentry_sdk/integrations/dedupe.py +++ b/sentry_sdk/integrations/dedupe.py @@ -1,5 +1,5 @@ import sentry_sdk -from sentry_sdk.utils import ContextVar +from sentry_sdk.utils import ContextVar, iter_event_stacktraces from sentry_sdk.integrations import Integration from sentry_sdk.scope import add_global_event_processor @@ -64,13 +64,11 @@ def processor(event, hint): if exc_info is None: return event - exc = exc_info[1] - exc_hash = DedupeIntegration._get_exception_hash(exc) - - if integration._last_seen.get(None) == exc_hash: + event_hash = hash(iter_event_stacktraces(event)) + if integration._last_seen.get(None) == event_hash: return None - integration._last_seen.set(exc_hash) + integration._last_seen.set(event_hash) return event @staticmethod From c8b8be4a1fab0fc05756c098849122078644aac4 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 5 Jun 2025 15:53:46 +0200 Subject: [PATCH 4/9] Hashing just the important parts of the event --- sentry_sdk/integrations/dedupe.py | 39 +++++++++++++------------------ 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/sentry_sdk/integrations/dedupe.py b/sentry_sdk/integrations/dedupe.py index c7d2d56f25..4568528f64 100644 --- a/sentry_sdk/integrations/dedupe.py +++ b/sentry_sdk/integrations/dedupe.py @@ -1,5 +1,5 @@ import sentry_sdk -from sentry_sdk.utils import ContextVar, iter_event_stacktraces +from sentry_sdk.utils import ContextVar from sentry_sdk.integrations import Integration from sentry_sdk.scope import add_global_event_processor @@ -19,33 +19,26 @@ def __init__(self): self._last_seen = ContextVar("last-seen") @staticmethod - def _get_exception_hash(exc): - # type: (Exception) -> int + def _get_event_hash(event): + # type: (Event) -> int """ - Create a memory-efficient hash for an exception. + Create a memory-efficient hash for an event. Instead of storing the entire exception object, we store just enough information to identify it uniquely. This avoids keeping the traceback and local variables in memory. """ - # Get the exception type name and message - exc_type = type(exc).__name__ - exc_message = str(exc) - - # Get the full stacktrace - stacktrace = [] - if hasattr(exc, "__traceback__") and exc.__traceback__: - tb = exc.__traceback__ - while tb: - frame = tb.tb_frame - filename = frame.f_code.co_filename - lineno = tb.tb_lineno - func_name = frame.f_code.co_name - stacktrace.append((filename, lineno, func_name)) - tb = tb.tb_next - - # Create a tuple of the essential information and hash it - return hash((exc_type, exc_message, tuple(stacktrace))) + event_hash = hash( + ( + event["exception"]["values"][0]["type"], + event["exception"]["values"][0]["value"], + event["exception"]["values"][0]["stacktrace"]["frames"][-1]["filename"], + event["exception"]["values"][0]["stacktrace"]["frames"][-1]["function"], + event["exception"]["values"][0]["stacktrace"]["frames"][-1]["lineno"], + ) + ) + + return event_hash @staticmethod def setup_once(): @@ -64,7 +57,7 @@ def processor(event, hint): if exc_info is None: return event - event_hash = hash(iter_event_stacktraces(event)) + event_hash = DedupeIntegration._get_event_hash(event) if integration._last_seen.get(None) == event_hash: return None From b815c2c0efb89886b9ca740dd8b6e93b0490e70e Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 5 Jun 2025 15:58:39 +0200 Subject: [PATCH 5/9] resilience --- sentry_sdk/integrations/dedupe.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/dedupe.py b/sentry_sdk/integrations/dedupe.py index 4568528f64..64d788e5bb 100644 --- a/sentry_sdk/integrations/dedupe.py +++ b/sentry_sdk/integrations/dedupe.py @@ -28,13 +28,18 @@ def _get_event_hash(event): information to identify it uniquely. This avoids keeping the traceback and local variables in memory. """ + try: + stacktrace = event["exception"]["values"][0].get("stacktrace") + except KeyError: + stacktrace = None + event_hash = hash( ( event["exception"]["values"][0]["type"], event["exception"]["values"][0]["value"], - event["exception"]["values"][0]["stacktrace"]["frames"][-1]["filename"], - event["exception"]["values"][0]["stacktrace"]["frames"][-1]["function"], - event["exception"]["values"][0]["stacktrace"]["frames"][-1]["lineno"], + stacktrace["frames"][-1]["filename"] if stacktrace else None, + stacktrace["frames"][-1]["function"] if stacktrace else None, + stacktrace["frames"][-1]["lineno"] if stacktrace else None, ) ) From 480c2d75380e2c6b3373dd605c9b4a138976b1cc Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 5 Jun 2025 16:10:14 +0200 Subject: [PATCH 6/9] using id() --- sentry_sdk/integrations/dedupe.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/dedupe.py b/sentry_sdk/integrations/dedupe.py index 64d788e5bb..46153e9900 100644 --- a/sentry_sdk/integrations/dedupe.py +++ b/sentry_sdk/integrations/dedupe.py @@ -62,11 +62,11 @@ def processor(event, hint): if exc_info is None: return event - event_hash = DedupeIntegration._get_event_hash(event) - if integration._last_seen.get(None) == event_hash: + exc = exc_info[1] + if integration._last_seen.get(None) == id(exc): return None - integration._last_seen.set(event_hash) + integration._last_seen.set(id(exc)) return event @staticmethod From cd0985a85cae800327c197a6eedb5f11ccfbd607 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 5 Jun 2025 16:33:26 +0200 Subject: [PATCH 7/9] cleanup --- sentry_sdk/integrations/dedupe.py | 27 --------------------------- tests/test_basics.py | 8 +++++++- 2 files changed, 7 insertions(+), 28 deletions(-) diff --git a/sentry_sdk/integrations/dedupe.py b/sentry_sdk/integrations/dedupe.py index 46153e9900..838bcd3eca 100644 --- a/sentry_sdk/integrations/dedupe.py +++ b/sentry_sdk/integrations/dedupe.py @@ -18,33 +18,6 @@ def __init__(self): # type: () -> None self._last_seen = ContextVar("last-seen") - @staticmethod - def _get_event_hash(event): - # type: (Event) -> int - """ - Create a memory-efficient hash for an event. - - Instead of storing the entire exception object, we store just enough - information to identify it uniquely. This avoids keeping the traceback - and local variables in memory. - """ - try: - stacktrace = event["exception"]["values"][0].get("stacktrace") - except KeyError: - stacktrace = None - - event_hash = hash( - ( - event["exception"]["values"][0]["type"], - event["exception"]["values"][0]["value"], - stacktrace["frames"][-1]["filename"] if stacktrace else None, - stacktrace["frames"][-1]["function"] if stacktrace else None, - stacktrace["frames"][-1]["lineno"] if stacktrace else None, - ) - ) - - return event_hash - @staticmethod def setup_once(): # type: () -> None diff --git a/tests/test_basics.py b/tests/test_basics.py index 2eeba78216..463f20da88 100644 --- a/tests/test_basics.py +++ b/tests/test_basics.py @@ -220,7 +220,13 @@ def before_breadcrumb(crumb, hint): crumb["data"] = {"foo": "bar"} return crumb - sentry_init(before_send=before_send, before_breadcrumb=before_breadcrumb) + from sentry_sdk.integrations.dedupe import DedupeIntegration + + sentry_init( + before_send=before_send, + before_breadcrumb=before_breadcrumb, + disabled_integrations=[DedupeIntegration], + ) events = capture_events() monkeypatch.setattr( From d328fdadfc660fccc9d83b50faaade1f658a8005 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 5 Jun 2025 16:33:57 +0200 Subject: [PATCH 8/9] better readability --- sentry_sdk/integrations/dedupe.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/dedupe.py b/sentry_sdk/integrations/dedupe.py index 838bcd3eca..d797f66a28 100644 --- a/sentry_sdk/integrations/dedupe.py +++ b/sentry_sdk/integrations/dedupe.py @@ -35,11 +35,11 @@ def processor(event, hint): if exc_info is None: return event - exc = exc_info[1] - if integration._last_seen.get(None) == id(exc): + exc_id = id(exc_info[1]) + if integration._last_seen.get(None) == exc_id: return None - integration._last_seen.set(id(exc)) + integration._last_seen.set(exc_id) return event @staticmethod From 07ca26669549a14b35fec3928d1e431e74a033af Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Fri, 6 Jun 2025 09:44:53 +0200 Subject: [PATCH 9/9] removed changes made to test. --- tests/test_basics.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/tests/test_basics.py b/tests/test_basics.py index 463f20da88..2eeba78216 100644 --- a/tests/test_basics.py +++ b/tests/test_basics.py @@ -220,13 +220,7 @@ def before_breadcrumb(crumb, hint): crumb["data"] = {"foo": "bar"} return crumb - from sentry_sdk.integrations.dedupe import DedupeIntegration - - sentry_init( - before_send=before_send, - before_breadcrumb=before_breadcrumb, - disabled_integrations=[DedupeIntegration], - ) + sentry_init(before_send=before_send, before_breadcrumb=before_breadcrumb) events = capture_events() monkeypatch.setattr(