diff --git a/keep/api/bl/maintenance_windows_bl.py b/keep/api/bl/maintenance_windows_bl.py index dd3b9fb255..7b34c2cede 100644 --- a/keep/api/bl/maintenance_windows_bl.py +++ b/keep/api/bl/maintenance_windows_bl.py @@ -59,61 +59,67 @@ def check_if_alert_in_maintenance_windows(self, alert: AlertDto) -> bool: env = celpy.Environment() for maintenance_rule in self.maintenance_rules: - if alert.status in maintenance_rule.ignore_statuses: - self.logger.debug( - "Alert status is set to be ignored, ignoring maintenance windows", - extra={"tenant_id": self.tenant_id}, - ) - continue - - if maintenance_rule.end_time.replace(tzinfo=datetime.UTC) <= datetime.datetime.now(datetime.UTC): - # this is wtf error, should not happen because of query in init - self.logger.error( - "Fetched maintenance window which already ended by mistake, should not happen!" - ) - continue + try: + if alert.status in maintenance_rule.ignore_statuses: + self.logger.debug( + "Alert status is set to be ignored, ignoring maintenance windows", + extra={"tenant_id": self.tenant_id}, + ) + continue - cel_result = MaintenanceWindowsBl.evaluate_cel(maintenance_rule, alert, env, self.logger, extra) + if maintenance_rule.end_time.replace(tzinfo=datetime.UTC) <= datetime.datetime.now(datetime.UTC): + # this is wtf error, should not happen because of query in init + self.logger.error( + "Fetched maintenance window which already ended by mistake, should not happen!" + ) + continue - if cel_result: - self.logger.info( - "Alert is in maintenance window", - extra={**extra, "maintenance_rule_id": maintenance_rule.id}, - ) + cel_result = MaintenanceWindowsBl.evaluate_cel(maintenance_rule, alert, env, self.logger, extra) - try: - audit = AlertAudit( - tenant_id=self.tenant_id, - fingerprint=alert.fingerprint, - user_id="Keep", - action=ActionType.MAINTENANCE.value, - description=( - f"Alert in maintenance due to rule `{maintenance_rule.name}`" - if not maintenance_rule.suppress - else f"Alert suppressed due to maintenance rule `{maintenance_rule.name}`" - ), - ) - self.session.add(audit) - self.session.commit() - except Exception: - self.logger.exception( - "Failed to write audit for alert maintenance window", - extra={ - "tenant_id": self.tenant_id, - "fingerprint": alert.fingerprint, - }, + if cel_result: + self.logger.info( + "Alert is in maintenance window", + extra={**extra, "maintenance_rule_id": maintenance_rule.id}, ) - if maintenance_rule.suppress: - # If user chose to suppress the alert, let it in but override the status. - if MAINTENANCE_WINDOW_ALERT_STRATEGY == "recover_previous_status": - alert.previous_status = alert.status - alert.status = AlertStatus.MAINTENANCE.value - else: - alert.status = AlertStatus.SUPPRESSED.value - return False + try: + audit = AlertAudit( + tenant_id=self.tenant_id, + fingerprint=alert.fingerprint, + user_id="Keep", + action=ActionType.MAINTENANCE.value, + description=( + f"Alert in maintenance due to rule `{maintenance_rule.name}`" + if not maintenance_rule.suppress + else f"Alert suppressed due to maintenance rule `{maintenance_rule.name}`" + ), + ) + self.session.add(audit) + self.session.commit() + except Exception: + self.logger.exception( + "Failed to write audit for alert maintenance window", + extra={ + "tenant_id": self.tenant_id, + "fingerprint": alert.fingerprint, + }, + ) + + if maintenance_rule.suppress: + # If user chose to suppress the alert, let it in but override the status. + if MAINTENANCE_WINDOW_ALERT_STRATEGY == "recover_previous_status": + alert.previous_status = alert.status + alert.status = AlertStatus.MAINTENANCE.value + else: + alert.status = AlertStatus.SUPPRESSED.value + return False - return True + return True + except Exception: + self.logger.exception( + "Error while evaluating maintenance window CEL expression", + extra={**extra, "maintenance_rule_id": maintenance_rule.id}, + ) self.logger.info("Alert is not in maintenance window", extra=extra) return False diff --git a/tests/test_maintenance_windows_bl.py b/tests/test_maintenance_windows_bl.py index 6223cf862b..4a852b5834 100644 --- a/tests/test_maintenance_windows_bl.py +++ b/tests/test_maintenance_windows_bl.py @@ -554,4 +554,31 @@ def test_strategy_alert_execution_wf( #THEN The WF is not executed if there is a resolved alert or executed 1 time if there are only firing alerts n_executions = get_workflow_executions(SINGLE_TENANT_UUID, workflow.id)[0] - assert n_executions == executions \ No newline at end of file + assert n_executions == executions + + +def test_maintenance_window_cel_evaluation_exception_handling( + mock_session, active_maintenance_window_rule, alert_dto +): + """ + Feature: Generic - check_if_alert_in_maintenance_windows method exception handling + Scenario: When there is an exception checking the parameters inside the + check_if_alert_in_maintenance_windows method, it should be handled and + the method should return False. + This prevents the system from crashing and continue with the main flow. + """ + + # GIVEN a maintenance window active with a erroneous CEL expression + active_maintenance_window_rule.cel_query = r'service.matches("(?i)^[10(\..*)?$")' + mock_session.query.return_value.filter.return_value.filter.return_value.filter.return_value.filter.return_value.all.return_value = [ + active_maintenance_window_rule + ] + + maintenance_window_bl = MaintenanceWindowsBl( + tenant_id="test-tenant", session=mock_session + ) + # WHEN it checks if the alert is in maintenance windows + result = maintenance_window_bl.check_if_alert_in_maintenance_windows(alert_dto) + + # Then it must return a boolean value, False in this case + assert result is False \ No newline at end of file