diff --git a/src/sentry/features/temporary.py b/src/sentry/features/temporary.py index 0da81a26bb6836..b5a17ac2f18c65 100644 --- a/src/sentry/features/temporary.py +++ b/src/sentry/features/temporary.py @@ -642,6 +642,8 @@ def register_temporary_features(manager: FeatureManager) -> None: manager.add("organizations:sentry-app-manual-token-refresh", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False) # Enables Conduit demo endpoint and UI manager.add("organizations:conduit-demo", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) + # Enable hard timeout alarm for webhooks + manager.add("organizations:sentry-app-webhook-hard-timeout", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False) # Enables organization access to the new notification platform manager.add("organizations:notification-platform.internal-testing", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py index e9efe8d2e33e25..464d2fe26097a8 100644 --- a/src/sentry/options/defaults.py +++ b/src/sentry/options/defaults.py @@ -2423,6 +2423,13 @@ flags=FLAG_AUTOMATOR_MODIFIABLE, ) +# Hard timeout for webhook requests to prevent indefinite hangs +register( + "sentry-apps.webhook.hard-timeout.sec", + default=5.0, + flags=FLAG_AUTOMATOR_MODIFIABLE, +) + # Enables statistical detectors for a project register( "statistical_detectors.enable", diff --git a/src/sentry/sentry_apps/metrics.py b/src/sentry/sentry_apps/metrics.py index dfd9d90fc279a7..aad977fa71eb26 100644 --- a/src/sentry/sentry_apps/metrics.py +++ b/src/sentry/sentry_apps/metrics.py @@ -70,6 +70,7 @@ class SentryAppWebhookHaltReason(StrEnum): MISSING_INSTALLATION = "missing_installation" RESTRICTED_IP = "restricted_ip" CONNECTION_RESET = "connection_reset" + HARD_TIMEOUT = "hard_timeout" class SentryAppExternalRequestFailureReason(StrEnum): diff --git a/src/sentry/utils/sentry_apps/webhooks.py b/src/sentry/utils/sentry_apps/webhooks.py index 97399851e0306a..c7627b45425353 100644 --- a/src/sentry/utils/sentry_apps/webhooks.py +++ b/src/sentry/utils/sentry_apps/webhooks.py @@ -2,6 +2,7 @@ import logging from collections.abc import Callable, Mapping +from types import FrameType from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar import sentry_sdk @@ -9,9 +10,10 @@ from requests.exceptions import ChunkedEncodingError, ConnectionError, Timeout from rest_framework import status -from sentry import options +from sentry import features, options from sentry.exceptions import RestrictedIPAddress from sentry.http import safe_urlopen +from sentry.organizations.services.organization.service import organization_service from sentry.sentry_apps.metrics import ( SentryAppEventType, SentryAppWebhookFailureReason, @@ -20,6 +22,7 @@ from sentry.sentry_apps.models.sentry_app import SentryApp, track_response_code from sentry.sentry_apps.utils.errors import SentryAppSentryError from sentry.shared_integrations.exceptions import ApiHostError, ApiTimeoutError, ClientError +from sentry.taskworker.workerchild import timeout_alarm from sentry.utils.sentry_apps import SentryAppWebhookRequestsBuffer if TYPE_CHECKING: @@ -36,6 +39,23 @@ T = TypeVar("T", bound=Mapping[str, Any]) +class WebhookTimeoutError(BaseException): + """This error represents a user set hard timeout for when a + webhook request should've completed within X seconds + """ + + pass + + +def _handle_webhook_timeout(signum: int, frame: FrameType | None) -> None: + """Handler for when a webhook request exceeds the hard timeout deadline. + - This is a workaround for safe_create_connection sockets hanging when the given url + cannot be reached or resolved. + - TODO(christinarlong): Add sentry app disabling logic here + """ + raise WebhookTimeoutError("Webhook request exceeded hard timeout deadline") + + def ignore_unpublished_app_errors( func: Callable[Concatenate[SentryApp | RpcSentryApp, P], R], ) -> Callable[Concatenate[SentryApp | RpcSentryApp, P], R | None]: @@ -99,12 +119,34 @@ def send_and_save_webhook_request( assert url is not None try: - response = safe_urlopen( - url=url, - data=app_platform_event.body, - headers=app_platform_event.headers, - timeout=options.get("sentry-apps.webhook.timeout.sec"), + organization_context = organization_service.get_organization_by_id( + id=app_platform_event.install.organization_id, + ) + if organization_context is not None and features.has( + "organizations:sentry-app-webhook-hard-timeout", + organization_context.organization, + ): + timeout_seconds = int(options.get("sentry-apps.webhook.hard-timeout.sec")) + with timeout_alarm(timeout_seconds, _handle_webhook_timeout): + response = safe_urlopen( + url=url, + data=app_platform_event.body, + headers=app_platform_event.headers, + timeout=options.get("sentry-apps.webhook.timeout.sec"), + ) + else: + response = safe_urlopen( + url=url, + data=app_platform_event.body, + headers=app_platform_event.headers, + timeout=options.get("sentry-apps.webhook.timeout.sec"), + ) + + except WebhookTimeoutError: + lifecycle.record_halt( + halt_reason=f"send_and_save_webhook_request.{SentryAppWebhookHaltReason.HARD_TIMEOUT}" ) + raise except (Timeout, ConnectionError) as e: error_type = e.__class__.__name__.lower() lifecycle.add_extras( diff --git a/tests/sentry/utils/sentry_apps/test_webhook_timeout.py b/tests/sentry/utils/sentry_apps/test_webhook_timeout.py new file mode 100644 index 00000000000000..475fcf2882492b --- /dev/null +++ b/tests/sentry/utils/sentry_apps/test_webhook_timeout.py @@ -0,0 +1,197 @@ +import signal +import time +from unittest.mock import Mock, patch + +import pytest +from requests import Response + +from sentry.sentry_apps.api.serializers.app_platform_event import AppPlatformEvent +from sentry.sentry_apps.metrics import SentryAppWebhookHaltReason +from sentry.testutils.asserts import assert_halt_metric +from sentry.testutils.cases import TestCase +from sentry.testutils.helpers.features import with_feature +from sentry.testutils.helpers.options import override_options +from sentry.testutils.silo import region_silo_test +from sentry.utils.sentry_apps.webhooks import WebhookTimeoutError, send_and_save_webhook_request + + +@region_silo_test +class WebhookTimeoutTest(TestCase): + def setUp(self): + self.organization = self.create_organization() + self.sentry_app = self.create_sentry_app( + name="TestApp", + organization=self.organization, + webhook_url="https://example.com/webhook", + ) + self.install = self.create_sentry_app_installation( + organization=self.organization, slug=self.sentry_app.slug + ) + + @with_feature("organizations:sentry-app-webhook-hard-timeout") + @override_options( + { + "sentry-apps.webhook.hard-timeout.sec": 5.0, + "sentry-apps.webhook.timeout.sec": 1.0, + "sentry-apps.webhook.restricted-webhook-sending": [], + "sentry-apps.webhook-logging.enabled": {"installation_uuid": [], "sentry_app_slug": []}, + } + ) + @patch("sentry.utils.sentry_apps.webhooks.safe_urlopen") + def test_webhook_completes_before_timeout(self, mock_safe_urlopen): + # Mock successful response + mock_response = Mock(spec=Response) + mock_response.status_code = 200 + mock_response.headers = {} + mock_safe_urlopen.return_value = mock_response + + app_platform_event = AppPlatformEvent( + resource="issue", action="created", install=self.install, data={"test": "data"} + ) + + # Should complete without raising WebhookTimeoutError + response = send_and_save_webhook_request(self.sentry_app, app_platform_event) + + assert response == mock_response + mock_safe_urlopen.assert_called_once() + + @with_feature("organizations:sentry-app-webhook-hard-timeout") + @override_options( + { + "sentry-apps.webhook.hard-timeout.sec": 1.0, + "sentry-apps.webhook.timeout.sec": 10.0, + "sentry-apps.webhook.restricted-webhook-sending": [], + "sentry-apps.webhook-logging.enabled": {"installation_uuid": [], "sentry_app_slug": []}, + } + ) + @patch("sentry.utils.sentry_apps.webhooks.safe_urlopen") + def test_webhook_interrupted_by_hard_timeout(self, mock_safe_urlopen): + # Make safe_urlopen sleep longer than the timeout + def slow_urlopen(*args, **kwargs): + time.sleep(6.0) # Sleep longer than 1 second timeout + mock_response = Mock(spec=Response) + mock_response.status_code = 200 + return mock_response + + mock_safe_urlopen.side_effect = slow_urlopen + + # Create event + app_platform_event = AppPlatformEvent( + resource="issue", action="created", install=self.install, data={"test": "data"} + ) + + # Should raise WebhookTimeoutError + with pytest.raises(WebhookTimeoutError, match="Webhook request exceeded hard timeout"): + send_and_save_webhook_request(self.sentry_app, app_platform_event) + + @with_feature("organizations:sentry-app-webhook-hard-timeout") + @override_options( + { + "sentry-apps.webhook.hard-timeout.sec": 1.0, + "sentry-apps.webhook.timeout.sec": 10.0, + "sentry-apps.webhook.restricted-webhook-sending": [], + } + ) + @patch("sentry.utils.sentry_apps.webhooks.safe_urlopen") + def test_timeout_exception_propagates(self, mock_safe_urlopen): + # Make safe_urlopen sleep + def slow_urlopen(*args, **kwargs): + time.sleep(2.0) + return Mock(spec=Response) + + mock_safe_urlopen.side_effect = slow_urlopen + + app_platform_event = AppPlatformEvent( + resource="issue", action="created", install=self.install, data={"test": "data"} + ) + + with pytest.raises(WebhookTimeoutError): + send_and_save_webhook_request(self.sentry_app, app_platform_event) + + @with_feature("organizations:sentry-app-webhook-hard-timeout") + @override_options( + { + "sentry-apps.webhook.hard-timeout.sec": 1.0, + "sentry-apps.webhook.timeout.sec": 10.0, + "sentry-apps.webhook.restricted-webhook-sending": [], + } + ) + @patch("sentry.utils.sentry_apps.webhooks.safe_urlopen") + @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") + def test_lifecycle_record_halt_called_on_timeout(self, mock_record, mock_safe_urlopen): + # Make safe_urlopen sleep + def slow_urlopen(*args, **kwargs): + time.sleep(2.0) + return Mock(spec=Response) + + mock_safe_urlopen.side_effect = slow_urlopen + + app_platform_event = AppPlatformEvent( + resource="issue", action="created", install=self.install, data={"test": "data"} + ) + + with pytest.raises(WebhookTimeoutError): + send_and_save_webhook_request(self.sentry_app, app_platform_event) + + assert_halt_metric( + mock_record=mock_record, + error_msg=f"send_and_save_webhook_request.{SentryAppWebhookHaltReason.HARD_TIMEOUT}", + ) + + @with_feature("organizations:sentry-app-webhook-hard-timeout") + @override_options( + { + "sentry-apps.webhook.hard-timeout.sec": 5.0, + "sentry-apps.webhook.timeout.sec": 10.0, + "sentry-apps.webhook.restricted-webhook-sending": [], + "sentry-apps.webhook-logging.enabled": {"installation_uuid": [], "sentry_app_slug": []}, + } + ) + @patch("sentry.utils.sentry_apps.webhooks.safe_urlopen") + def test_timeout_alarm_restores_signal_handler(self, mock_safe_urlopen): + # Get original handler + original_handler = signal.signal(signal.SIGALRM, signal.SIG_DFL) + signal.signal(signal.SIGALRM, original_handler) + + # Mock quick response + mock_response = Mock(spec=Response) + mock_response.status_code = 200 + mock_response.headers = {} + mock_safe_urlopen.return_value = mock_response + + app_platform_event = AppPlatformEvent( + resource="issue", action="created", install=self.install, data={"test": "data"} + ) + + send_and_save_webhook_request(self.sentry_app, app_platform_event) + + # Verify signal handler was restored + current_handler = signal.signal(signal.SIGALRM, signal.SIG_DFL) + signal.signal(signal.SIGALRM, current_handler) + assert current_handler == original_handler + + @with_feature("organizations:sentry-app-webhook-hard-timeout") + @override_options( + { + "sentry-apps.webhook.hard-timeout.sec": 5.0, + "sentry-apps.webhook.timeout.sec": 1.0, + "sentry-apps.webhook.restricted-webhook-sending": [], + "sentry-apps.webhook-logging.enabled": {"installation_uuid": [], "sentry_app_slug": []}, + } + ) + @patch("sentry.utils.sentry_apps.webhooks.safe_urlopen") + def test_alarm_cancelled_after_successful_webhook(self, mock_safe_urlopen): + mock_response = Mock(spec=Response) + mock_response.status_code = 200 + mock_response.headers = {} + mock_safe_urlopen.return_value = mock_response + + app_platform_event = AppPlatformEvent( + resource="issue", action="created", install=self.install, data={"test": "data"} + ) + + send_and_save_webhook_request(self.sentry_app, app_platform_event) + + # Verify no alarm is pending + remaining = signal.alarm(0) + assert remaining == 0 # No alarm was pending