From bae409bcd64b2f41ac2488ba9e1a8edff0a7744a Mon Sep 17 00:00:00 2001 From: Tahlia Richardson <3069029+tahliar@users.noreply.github.com> Date: Mon, 4 Mar 2024 11:38:11 +1000 Subject: [PATCH] Add formulas for calculating diskless SBD timeouts (#370) * Add formulas for calculating diskless SBD timeouts bsc#1219972 jsc#DOCTEAM-1289 * Add warning about diskless SBD timeout misconfiguration --- xml/ha_storage_protection.xml | 38 ++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/xml/ha_storage_protection.xml b/xml/ha_storage_protection.xml index 975db380..32fb8c44 100644 --- a/xml/ha_storage_protection.xml +++ b/xml/ha_storage_protection.xml @@ -347,8 +347,9 @@ This timeout is set in the CIB as a global cluster property. If not set explicitly, it defaults to 0, which is appropriate for - using SBD with one to three devices. For use of SBD in diskless mode, see for more details. + using SBD with one to three devices. For SBD in diskless mode, this timeout + must not be 0. For details, see + . @@ -995,8 +996,10 @@ SBD_WATCHDOG_TIMEOUT=5 properties on the &crmshell;: &prompt.crm.conf;property stonith-enabled="true" -&prompt.crm.conf;property stonith-watchdog-timeout=10 +&prompt.crm.conf;property stonith-watchdog-timeout=10 +&prompt.crm.conf;property stonith-timeout=15 @@ -1007,12 +1010,29 @@ SBD_WATCHDOG_TIMEOUT=5 For diskless SBD, this parameter must not equal zero. It defines after how long it is assumed that the fencing target has already - self-fenced. Therefore its value needs to be >= the value of - SBD_WATCHDOG_TIMEOUT in /etc/sysconfig/sbd. - Starting with &productname; 15, if you set stonith-watchdog-timeout - to a negative value, Pacemaker will automatically calculate this timeout - and set it to twice the value of SBD_WATCHDOG_TIMEOUT. + self-fenced. Use the following formula to calculate this timeout: + stonith-watchdog-timeout >= (SBD_WATCHDOG_TIMEOUT * 2) + + If you set stonith-watchdog-timeout + to a negative value, Pacemaker automatically calculates this timeout + and sets it to twice the value of SBD_WATCHDOG_TIMEOUT. + + + + + This parameter must allow sufficient time for fencing to complete. + For diskless SBD, use the following formula to calculate this timeout: + + stonith-timeout >= stonith-watchdog-timeout + 20% + + Diskless SBD timeouts + + With diskless SBD, if the stonith-timeout value is smaller than the + stonith-watchdog-timeout value, failed nodes can become stuck + in an UNCLEAN state and block failover of active resources. + +