From c9ef61f42df67c3e63045d7a5e8c192a9df0a07c Mon Sep 17 00:00:00 2001 From: lpinne Date: Tue, 15 Aug 2023 11:24:43 +0200 Subject: [PATCH 01/48] susChkSrv.py.7: example killing hdbindexserver --- man/susChkSrv.py.7 | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/man/susChkSrv.py.7 b/man/susChkSrv.py.7 index 5338b379..e0946a63 100644 --- a/man/susChkSrv.py.7 +++ b/man/susChkSrv.py.7 @@ -351,6 +351,27 @@ It does not touch any fencing device. # crm_attribute -t status -N 'node2' -G -n terminate .RE .PP +\fB*\fR Example for killing HANA hdbindexserver process. +.PP +This could be done for testing the HA/DR provider hook script integration. +Killing HANA processes is dangerous. This test should not be done +on production systems. +Please refer to SAP HANA documentation. See also manual page killall(1). +.br +Note: Understand the impact before trying. +.PP +1. Check HANA and Linux cluster for clean idle state. +.PP +2. On secondary master name server, kill the hdbindexserver process. +.RS 2 +# killall -9 hdbindexserver +.RE +.PP +3. Check the nameserver tracefile for srServiceStateChanged() events. +.PP +4. Check HANA and Linux cluster for clean idle state. +.RE +.PP .\" .SH FILES .TP @@ -423,7 +444,7 @@ Please report any other feedback and suggestions to feedback@suse.com. \fBocf_suse_SAPHanaTopology\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , \fBSAPHanaSR-hookHelper\fP(8) , \fBSAPHanaSR-manageProvider\fP(8) , \fBcrm\fP(8) , \fBcrm_attribute\fP(8) , -\fBpython3\fP(8) , +\fBpython3\fP(8) , \fBkillall\fP(1) , .br https://help.sap.com/docs/SAP_HANA_PLATFORM?locale=en-US .br From 5045d995329cbe4e7279c21153842b2db041d3cc Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 15 Aug 2023 12:05:16 +0200 Subject: [PATCH 02/48] angi: tester: callTest-multiNode - parameter handling fixed --- test/callTest-multiNode | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/test/callTest-multiNode b/test/callTest-multiNode index 83fdcd09..b72c680b 100755 --- a/test/callTest-multiNode +++ b/test/callTest-multiNode @@ -41,27 +41,36 @@ while [ $# -gt 0 ]; do case "$1" in --test_scenario=* ) test_scenario=${1#*=} + echo "param: test_scenario=$test_scenario" + shift ;; --nodes ) shift - while [[ "$1" != "" && "$1" != "--*" ]]; do - nodes="$nodes ${1#*=}" + while [ $# -gt 0 ]; do + case "$1" in + --* ) break;; + * ) + nodes="$nodes ${1#*=}";; + esac shift - echo $nodes done + echo "param: nodes=$nodes" ;; --test_case=* ) test_case=${1#*=} + echo "param: test_case=$test_case" + shift ;; --properties=* ) test_prop=${1#*=} + echo "param: properties=$test_prop" + shift ;; --help* ) usage exit 2 ;; esac - shift done test_dir="/usr/share/SAPHanaSR-tester/json/$test_scenario" From b708e1df209b207336e2b2237a6b00c98365eced Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 15 Aug 2023 12:06:31 +0200 Subject: [PATCH 03/48] angi: tester: SAPHanaSR-testCluster-html - failure-id initialization --- test/SAPHanaSR-testCluster-html | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/test/SAPHanaSR-testCluster-html b/test/SAPHanaSR-testCluster-html index be2def08..1ffe604a 100755 --- a/test/SAPHanaSR-testCluster-html +++ b/test/SAPHanaSR-testCluster-html @@ -121,8 +121,18 @@ class SshMultiNode: 'fail-prereq': 0, 'fail-recovery': 0, 'fail-any': 0, - 'failed-id': {}, - 'failed-tests': {}, + 'failed-id': { + 'n/a 01': 'n/a', + 'n/a 02': 'n/a', + 'n/a 03': 'n/a', + 'n/a 04': 'n/a' + }, + 'failed-tests': { + 'n/a 01': 'n/a', + 'n/a 02': 'n/a', + 'n/a 03': 'n/a', + 'n/a 04': 'n/a' + }, 'skipped': {}, 'succ-any': 0, 'succ-prereq': 0, @@ -300,6 +310,7 @@ while True: lines[line_index] = line line_index += 1 + # TODO improve initialization failed_test_names_sorted_first4 = list(test01.stat['failed-tests-rev'].keys())[0:4] # top 4 print(f"top : {failed_test_names_sorted_first4}") print(f"ids : {str(test01.stat['failed-id'])}") From 456749d704e9776ea2e72f0d85c71a0e9c3cbbc4 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 16 Aug 2023 12:15:35 +0200 Subject: [PATCH 04/48] angi: tester: json - worked on expectation of test scenarios angi-ScaleOut and angi-ScaleOut-BW --- .../kill_prim_indexserver.json | 2 +- .../json/angi-ScaleOut-BW/kill_prim_inst.json | 2 +- .../kill_prim_worker_inst.json | 2 +- .../kill_secn_indexserver.json | 2 +- .../json/angi-ScaleOut-BW/kill_secn_inst.json | 2 +- .../kill_secn_worker_inst.json | 2 +- .../angi-ScaleOut/kill_prim_indexserver.json | 13 +++---- test/json/angi-ScaleOut/kill_prim_inst.json | 15 ++++---- .../angi-ScaleOut/kill_prim_worker_inst.json | 11 +++--- .../angi-ScaleOut/kill_prim_worker_node.json | 2 +- .../angi-ScaleOut/kill_secn_indexserver.json | 2 +- test/json/angi-ScaleOut/kill_secn_inst.json | 4 +-- .../angi-ScaleOut/kill_secn_worker_inst.json | 36 +++++-------------- .../angi-ScaleOut/kill_secn_worker_node.json | 28 +++------------ 14 files changed, 42 insertions(+), 81 deletions(-) diff --git a/test/json/angi-ScaleOut-BW/kill_prim_indexserver.json b/test/json/angi-ScaleOut-BW/kill_prim_indexserver.json index 67ed095e..3be3fedd 100644 --- a/test/json/angi-ScaleOut-BW/kill_prim_indexserver.json +++ b/test/json/angi-ScaleOut-BW/kill_prim_indexserver.json @@ -22,7 +22,7 @@ "loop": 120, "wait": 2, "pSite": [ - "lss=[12]" , + "lss=(1|2)" , "srr=P" , "lpt=(1[6-9]........|20)" , "srHook=(PRIM|SWAIT|SREG)" , diff --git a/test/json/angi-ScaleOut-BW/kill_prim_inst.json b/test/json/angi-ScaleOut-BW/kill_prim_inst.json index 21c29c74..6fea865a 100644 --- a/test/json/angi-ScaleOut-BW/kill_prim_inst.json +++ b/test/json/angi-ScaleOut-BW/kill_prim_inst.json @@ -24,7 +24,7 @@ "loop": 120, "wait": 2, "pSite": [ - "lss=[12]" , + "lss=(1|2)" , "srr=P" , "lpt=(1[6-9]........|20)" , "srHook=(PRIM|SWAIT|SREG)" , diff --git a/test/json/angi-ScaleOut-BW/kill_prim_worker_inst.json b/test/json/angi-ScaleOut-BW/kill_prim_worker_inst.json index edd373fb..1e41df0a 100644 --- a/test/json/angi-ScaleOut-BW/kill_prim_worker_inst.json +++ b/test/json/angi-ScaleOut-BW/kill_prim_worker_inst.json @@ -24,7 +24,7 @@ "loop": 120, "wait": 2, "pSite": [ - "lss=[12]" , + "lss=(1|2)" , "srr=P" , "lpt=(1[6-9]........|20)" , "srHook=(PRIM|SWAIT|SREG)" , diff --git a/test/json/angi-ScaleOut-BW/kill_secn_indexserver.json b/test/json/angi-ScaleOut-BW/kill_secn_indexserver.json index d82615fe..409b37d3 100644 --- a/test/json/angi-ScaleOut-BW/kill_secn_indexserver.json +++ b/test/json/angi-ScaleOut-BW/kill_secn_indexserver.json @@ -30,7 +30,7 @@ ], "sSite": [ "lpt=(10|30)", - "lss=[12]", + "lss=(1|2)", "srr=S", "srHook=SFAIL", "srPoll=(SFAIL|SOK)" diff --git a/test/json/angi-ScaleOut-BW/kill_secn_inst.json b/test/json/angi-ScaleOut-BW/kill_secn_inst.json index ef4af67a..ee928ef7 100644 --- a/test/json/angi-ScaleOut-BW/kill_secn_inst.json +++ b/test/json/angi-ScaleOut-BW/kill_secn_inst.json @@ -30,7 +30,7 @@ ], "sSite": [ "lpt=(10|30)", - "lss=[12]", + "lss=(1|2)", "srr=S", "srHook=SFAIL", "srPoll=(SFAIL|SOK)" diff --git a/test/json/angi-ScaleOut-BW/kill_secn_worker_inst.json b/test/json/angi-ScaleOut-BW/kill_secn_worker_inst.json index 33f9ecc0..775b4b94 100644 --- a/test/json/angi-ScaleOut-BW/kill_secn_worker_inst.json +++ b/test/json/angi-ScaleOut-BW/kill_secn_worker_inst.json @@ -30,7 +30,7 @@ ], "sSite": [ "lpt=(10|30)", - "lss=[12]", + "lss=(1|2)", "srr=S", "srHook=SFAIL", "srPoll=(SFAIL|SOK)" diff --git a/test/json/angi-ScaleOut/kill_prim_indexserver.json b/test/json/angi-ScaleOut/kill_prim_indexserver.json index 67ed095e..31337a39 100644 --- a/test/json/angi-ScaleOut/kill_prim_indexserver.json +++ b/test/json/angi-ScaleOut/kill_prim_indexserver.json @@ -21,8 +21,9 @@ "next": "step30", "loop": 120, "wait": 2, + "comment": "sSite: srPoll could get SFAIL on scale-out", "pSite": [ - "lss=[12]" , + "lss=(1|2)" , "srr=P" , "lpt=(1[6-9]........|20)" , "srHook=(PRIM|SWAIT|SREG)" , @@ -33,12 +34,12 @@ "lss=4", "srr=S", "srHook=(PRIM|SOK)", - "srPoll=SOK" + "srPoll=(SOK|SFAIL)" ], "pHost": [ "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , "roles=master1::worker:" , - "score=(90|5|0)" + "score=(90|70|5|0)" ], "sHost": [ "clone_state=(PROMOTED|DEMOTED)" , @@ -65,12 +66,12 @@ "lss=4", "srr=(S|P)", "srHook=PRIM", - "srPoll=SOK" + "srPoll=(SOK|SFAIL)" ], "pHost": [ "clone_state=(UNDEFINED|DEMOTED)" , "roles=master1::worker:" , - "score=(90|5)" + "score=(90|70|5)" ], "sHost": [ "clone_state=(DEMOTED|PROMOTED)" , @@ -83,7 +84,7 @@ "step": "step40", "name": "end recover", "next": "END", - "loop": 120, + "loop": 360, "wait": 2, "post": "cleanup", "remark": "pXXX and sXXX are now exchanged", diff --git a/test/json/angi-ScaleOut/kill_prim_inst.json b/test/json/angi-ScaleOut/kill_prim_inst.json index 21c29c74..838f026e 100644 --- a/test/json/angi-ScaleOut/kill_prim_inst.json +++ b/test/json/angi-ScaleOut/kill_prim_inst.json @@ -14,7 +14,7 @@ "todo1": "allow something like lss>2, lpt>10000, score!=123", "pSite": "pSiteUp", "sSite": "sSiteUp", - "pHost": "pHostUp", + "pHost": "pHostUp", "sHost": "sHostUp" }, { @@ -23,8 +23,9 @@ "next": "step30", "loop": 120, "wait": 2, + "comment": "sSite: srPoll could get SFAIL on scale-out", "pSite": [ - "lss=[12]" , + "lss=(1|2)" , "srr=P" , "lpt=(1[6-9]........|20)" , "srHook=(PRIM|SWAIT|SREG)" , @@ -35,12 +36,12 @@ "lss=4", "srr=S", "srHook=(PRIM|SOK)", - "srPoll=SOK" + "srPoll=(SOK|SFAIL)" ], "pHost": [ "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , "roles=master1::worker:" , - "score=(90|5|0)" + "score=(90|70|5|0)" ], "sHost": [ "clone_state=(PROMOTED|DEMOTED)" , @@ -67,12 +68,12 @@ "lss=4", "srr=(S|P)", "srHook=PRIM", - "srPoll=SOK" + "srPoll=(SOK|SFAIL)" ], "pHost": [ "clone_state=(UNDEFINED|DEMOTED)" , "roles=master1::worker:" , - "score=(90|5)" + "score=(90|70|5)" ], "sHost": [ "clone_state=(DEMOTED|PROMOTED)" , @@ -85,7 +86,7 @@ "step": "step40", "name": "end recover", "next": "END", - "loop": 300, + "loop": 360, "wait": 2, "post": "cleanup", "remark": "pXXX and sXXX are now exchanged", diff --git a/test/json/angi-ScaleOut/kill_prim_worker_inst.json b/test/json/angi-ScaleOut/kill_prim_worker_inst.json index edd373fb..59e6d205 100644 --- a/test/json/angi-ScaleOut/kill_prim_worker_inst.json +++ b/test/json/angi-ScaleOut/kill_prim_worker_inst.json @@ -24,7 +24,7 @@ "loop": 120, "wait": 2, "pSite": [ - "lss=[12]" , + "lss=(1|2)" , "srr=P" , "lpt=(1[6-9]........|20)" , "srHook=(PRIM|SWAIT|SREG)" , @@ -35,12 +35,11 @@ "lss=4", "srr=S", "srHook=(PRIM|SOK)", - "srPoll=SOK" + "srPoll=(SOK|SFAIL)" ], "pHost": [ "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , - "roles=master1::worker:" , - "score=(90|5|0)" + "score=(90|70|5|0)" ], "sHost": [ "clone_state=(PROMOTED|DEMOTED)", @@ -67,12 +66,12 @@ "lss=4", "srr=(S|P)", "srHook=PRIM", - "srPoll=SOK" + "srPoll=(SOK|SFAIL)" ], "pHost": [ "clone_state=(UNDEFINED|DEMOTED)" , "roles=master1::worker:", - "score=(90|5)" + "score=(90|70|5)" ], "sHost": [ "clone_state=(DEMOTED|PROMOTED)" , diff --git a/test/json/angi-ScaleOut/kill_prim_worker_node.json b/test/json/angi-ScaleOut/kill_prim_worker_node.json index 3e5abd40..e2256cd8 100644 --- a/test/json/angi-ScaleOut/kill_prim_worker_node.json +++ b/test/json/angi-ScaleOut/kill_prim_worker_node.json @@ -37,7 +37,7 @@ ], "pHost": [ "clone_state=(DEMOTED|UNDEFINED|WAITING4NODES)" , - "score=(70|5)" + "score=(90|70|5)" ], "sHost": [ "clone_state=(PROMOTED|DEMOTED)", diff --git a/test/json/angi-ScaleOut/kill_secn_indexserver.json b/test/json/angi-ScaleOut/kill_secn_indexserver.json index d82615fe..409b37d3 100644 --- a/test/json/angi-ScaleOut/kill_secn_indexserver.json +++ b/test/json/angi-ScaleOut/kill_secn_indexserver.json @@ -30,7 +30,7 @@ ], "sSite": [ "lpt=(10|30)", - "lss=[12]", + "lss=(1|2)", "srr=S", "srHook=SFAIL", "srPoll=(SFAIL|SOK)" diff --git a/test/json/angi-ScaleOut/kill_secn_inst.json b/test/json/angi-ScaleOut/kill_secn_inst.json index ef4af67a..95f2de32 100644 --- a/test/json/angi-ScaleOut/kill_secn_inst.json +++ b/test/json/angi-ScaleOut/kill_secn_inst.json @@ -30,7 +30,7 @@ ], "sSite": [ "lpt=(10|30)", - "lss=[12]", + "lss=(1|2)", "srr=S", "srHook=SFAIL", "srPoll=(SFAIL|SOK)" @@ -82,7 +82,7 @@ "step": "step40", "name": "end recover", "next": "END", - "loop": 120, + "loop": 240, "wait": 2, "post": "cleanup", "pSite": "pSiteUp", diff --git a/test/json/angi-ScaleOut/kill_secn_worker_inst.json b/test/json/angi-ScaleOut/kill_secn_worker_inst.json index 33f9ecc0..c75d63f9 100644 --- a/test/json/angi-ScaleOut/kill_secn_worker_inst.json +++ b/test/json/angi-ScaleOut/kill_secn_worker_inst.json @@ -11,7 +11,7 @@ "wait": 1, "post": "kill_secn_worker_inst", "pSite": "pSiteUp", - "sSite": "sSiteUp", + "sSite": "sSiteUp", "pHost": "pHostUp", "sHost": "sHostUp" }, @@ -21,27 +21,17 @@ "next": "step30", "loop": 120, "wait": 2, - "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" - ], + "pSite": "pSiteUp", "sSite": [ "lpt=(10|30)", - "lss=[12]", + "lss=(1|2)", "srr=S", - "srHook=SFAIL", + "srHook=(SFAIL|SWAIT)", "srPoll=(SFAIL|SOK)" ], - "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" - ], + "pHost": "pHostUp", "sHost": [ - "clone_state=DEMOTED" , + "clone_state=(DEMOTED|UNDEFINED)" , "roles=master1::worker:" , "score=(-INFINITY|0)" ] @@ -53,13 +43,7 @@ "loop": 120, "wait": 2, "todo": "pHost+sHost to check site-name", - "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" - ], + "pSite": "pSiteUp", "sSite": [ "lpt=10", "lss=(1|2)", @@ -67,11 +51,7 @@ "srHook=(SFAIL|SWAIT)", "srPoll=(SFAIL|SOK)" ], - "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" - ], + "pHost": "pHostUp", "sHost": [ "clone_state=(UNDEFINED|DEMOTED)" , "roles=master1::worker:" , diff --git a/test/json/angi-ScaleOut/kill_secn_worker_node.json b/test/json/angi-ScaleOut/kill_secn_worker_node.json index 1034e1d7..bfb3122f 100644 --- a/test/json/angi-ScaleOut/kill_secn_worker_node.json +++ b/test/json/angi-ScaleOut/kill_secn_worker_node.json @@ -21,13 +21,7 @@ "next": "step30", "loop": 120, "wait": 2, - "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" - ], + "pSite": "pSiteUp", "sSite": [ "lpt=10", "lss=1", @@ -35,11 +29,7 @@ "srHook=SFAIL", "srPoll=SFAIL" ], - "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" - ], + "pHost": "pHostUp", "sHost": [ "clone_state=WAITING4NODES" ] @@ -51,13 +41,7 @@ "loop": 120, "wait": 2, "todo": "pHost+sHost to check site-name", - "pSite": [ - "lss=4" , - "srr=P" , - "lpt=1[6-9]........" , - "srHook=PRIM" , - "srPoll=PRIM" - ], + "pSite": "pSiteUp", "sSite": [ "lpt=10", "lss=(1|2)", @@ -65,11 +49,7 @@ "srHook=(SFAIL|SWAIT)", "srPoll=(SFAIL|SOK)" ], - "pHost": [ - "clone_state=PROMOTED" , - "roles=master1:master:worker:master" , - "score=150" - ], + "pHost": "pHostUp", "sHost": [ "clone_state=(UNDEFINED|DEMOTED)" , "roles=master1::worker:" , From a208af241d4b72dcd1b1203bd21cdb47ebee6f86 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 16 Aug 2023 12:17:38 +0200 Subject: [PATCH 05/48] angi: tester: saphana_sr_test.py - new actions for kill_*_worker_indexserver and improved logging for debugging purposes --- test/saphana_sr_test.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/test/saphana_sr_test.py b/test/saphana_sr_test.py index 2de3f2d4..82c6dc8d 100755 --- a/test/saphana_sr_test.py +++ b/test/saphana_sr_test.py @@ -289,7 +289,7 @@ def __get_failed__(self): return self.run['failed'] return None - def run_checks(self, checks, area_name, object_name ): + def run_checks(self, checks, area_name, object_name, step_step ): """ run all checks for area and object """ l_sr = self.dict_sr check_result = -1 @@ -318,7 +318,7 @@ def run_checks(self, checks, area_name, object_name ): if (found == 0) and (check_result < 2 ): check_result = 2 if self.config['dump_failures'] and 'failed' in self.run: - self.message(f"FAILED: {self.__get_failed__()}", stdout=False) + self.message(f"FAILED: step={step_step} {self.__get_failed__()}", stdout=False) return check_result def process_topology_object(self, step, topology_object_name, area_name): @@ -333,7 +333,7 @@ def process_topology_object(self, step, topology_object_name, area_name): topolo = self.topolo if topology_object_name in topolo: object_name = topolo[topology_object_name] - rc_checks = self.run_checks(checks, area_name, object_name) + rc_checks = self.run_checks(checks, area_name, object_name, step.get('step','')) return rc_checks def process_step(self, step): @@ -360,6 +360,7 @@ def process_step(self, step): f" step_name='{step_name}'" f" step_next={step_next}" f" step_action='{step_action}'" + f" max_loops='{max_loops}'" ) self.message(_l_msg) while loops < max_loops: @@ -475,6 +476,12 @@ def action_on_hana(self, action_name): elif action_name == "kill_secn_indexserver": remote = self.topolo['sHost'] cmd = "pkill -f -u {}adm --signal 11 hdbindexserver".format(test_sid.lower()) + elif action_name == "kill_prim_worker_indexserver": + remote = self.topolo['pWorker'] + cmd = "pkill -f -u {}adm --signal 11 hdbindexserver".format(test_sid.lower()) + elif action_name == "kill_secn_worker_indexserver": + remote = self.topolo['sWorker'] + cmd = "pkill -f -u {}adm --signal 11 hdbindexserver".format(test_sid.lower()) elif action_name == "bmt": remote = self.topolo['sHost'] cmd = "su - {}adm -c 'hdbnsutil -sr_takeover'".format(test_sid.lower()) @@ -538,7 +545,8 @@ def action(self, action_name): action_rc = 0 if action_name == "": action_rc = 0 - elif action_name_short in ("kill_prim_inst", "kill_prim_worker_inst", "kill_secn_inst", "kill_secn_worker_inst", "kill_prim_indexserver", "kill_secn_indexserver", "bmt"): + elif action_name_short in ("kill_prim_inst", "kill_prim_worker_inst", "kill_secn_inst", "kill_secn_worker_inst", "kill_prim_indexserver", "kill_secn_indexserver", + "kill_prim_worker_indexserver", "kill_secn_worker_indexserver" , "bmt"): action_rc = self.action_on_hana(action_name) elif action_name_short in ("ssn", "osn", "spn", "opn", "cleanup", "kill_secn_node", "kill_secn_worker_node", "kill_prim_node", "kill_prim_worker_node", "simulate_split_brain"): action_rc = self.action_on_cluster(action_name) From 5c8f9c09fc54844adbb11353021749657d344960 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 16 Aug 2023 12:22:23 +0200 Subject: [PATCH 06/48] angi: tester: SAPHanaSR-testCluster-html - fixed reset statistics and switched tile to ScaleOut --- test/SAPHanaSR-testCluster-html | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/SAPHanaSR-testCluster-html b/test/SAPHanaSR-testCluster-html index 1ffe604a..1a6b0215 100755 --- a/test/SAPHanaSR-testCluster-html +++ b/test/SAPHanaSR-testCluster-html @@ -128,10 +128,10 @@ class SshMultiNode: 'n/a 04': 'n/a' }, 'failed-tests': { - 'n/a 01': 'n/a', - 'n/a 02': 'n/a', - 'n/a 03': 'n/a', - 'n/a 04': 'n/a' + 'n/a 01': 0, + 'n/a 02': 0, + 'n/a 03': 0, + 'n/a 04': 0 }, 'skipped': {}, 'succ-any': 0, @@ -248,7 +248,7 @@ if args.sleep: seconds = int(time.time()) -test_scenario = "SAPHanaSR-angi - ScaleUp" +test_scenario = "SAPHanaSR-angi - ScaleOut" # html_out: use this as output-path later once SAPHanaSR-tester-html runs more out of its own (not in a bash script loop) html_out = 'TestSAPHanaSR-angi-ScaleOut.html' From ee8e5e3fa5b47f80a981190ab5a2085933d3a07b Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 16 Aug 2023 12:23:51 +0200 Subject: [PATCH 07/48] angi: tester: json - new tst case kill_prim_worker_indexserver in test scenario angi-SacleOut --- .../kill_prim_worker_indexserver.json | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 test/json/angi-ScaleOut/kill_prim_worker_indexserver.json diff --git a/test/json/angi-ScaleOut/kill_prim_worker_indexserver.json b/test/json/angi-ScaleOut/kill_prim_worker_indexserver.json new file mode 100644 index 00000000..ee4d17f0 --- /dev/null +++ b/test/json/angi-ScaleOut/kill_prim_worker_indexserver.json @@ -0,0 +1,95 @@ +{ + "test": "kill_prim_worker_indexserver", + "name": "Kill primary worker indexserver", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_prim_worker_indexserver", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss=(1|2)" , + "srr=P" , + "lpt=(1[6-9]........|20)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=S", + "srHook=(PRIM|SOK)", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , + "score=(90|70|5|0)" + ], + "sHost": [ + "clone_state=(PROMOTED|DEMOTED)", + "roles=master1:master:worker:master" , + "score=(100|145)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "todo2": "why do we need SFAIL for srHook?", + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=(1[6-9]........|30|20|10)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=(S|P)", + "srHook=(PRIM|SFAIL)", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(UNDEFINED|DEMOTED)" , + "score=(90|70|5)" + ], + "sHost": [ + "clone_state=(DEMOTED|PROMOTED)" , + "roles=master1:master:worker:master" , + "score=(100|145)" , + "srah=T" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 360, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} From 54b029d02ff3ac90434afeed0d28fab02396467b Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 16 Aug 2023 16:39:12 +0200 Subject: [PATCH 08/48] angi: tester: test_restart_cluster_turn_hana - wait for SR before doing a handshake takeover --- test/test_restart_cluster_turn_hana | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/test/test_restart_cluster_turn_hana b/test/test_restart_cluster_turn_hana index c1513ca6..66ccb2d4 100755 --- a/test/test_restart_cluster_turn_hana +++ b/test/test_restart_cluster_turn_hana @@ -35,16 +35,25 @@ ssh "${node02}" 'crm cluster run "crm cluster stop"' echo "==== SUSE Cluster Stopped ====" -ssh "$node01" 'su - '"$sidadm"' -c "HDB start"' & -ssh "$node02" 'su - '"$sidadm"' -c "HDB start"' & -wait +ssh "$node01" 'su - '"$sidadm"' -c "sapcontrol -nr '$instNr' -function StartSystem"' +ssh "$node02" 'su - '"$sidadm"' -c "sapcontrol -nr '$instNr' -function StartSystem"' +ssh "$node01" 'su - '"$sidadm"' -c "sapcontrol -nr '$instNr' -function WaitforStarted 300 10"' +ssh "$node02" 'su - '"$sidadm"' -c "sapcontrol -nr '$instNr' -function WaitforStarted 300 10"' + +while true; do + ssh "$currPrimary" 'su - '"$sidadm"' -c "cdpy; python3 systemReplicationStatus.py 1>/dev/null"'; rc=$? + if [[ "$rc" != 15 ]]; then + sleep 60 + else + break + fi +done echo "==== SAP HANA DBs started ====" ssh "$currSecondary" 'su - '"$sidadm"' -c "hdbnsutil -sr_takeover --suspendPrimary"' ssh "$currPrimary" 'su - '"$sidadm"' -c "hdbnsutil -sr_register --remoteHost='"$currSecondary"' --remoteInstance='"$instNr"' --name='"$sitePrimary"' --replicationMode='"$srMode"' --operationMode='"$opMode"' --online"' - ssh "$node01" 'crm cluster run "crm cluster start"' echo "==== SUSE Cluster Started ====" From 2ee29c022221910298cc2af9d5293383524e8bcb Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 16 Aug 2023 16:41:38 +0200 Subject: [PATCH 09/48] angi: tester: saphana_sr_test.py - added flush for log messages --- test/saphana_sr_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/saphana_sr_test.py b/test/saphana_sr_test.py index 82c6dc8d..400939ad 100755 --- a/test/saphana_sr_test.py +++ b/test/saphana_sr_test.py @@ -39,14 +39,15 @@ def message(self, msg, **kwargs): r_id = "" msg_arr = msg.split(" ") if stdout: - print("{}{} {:<9s} {}".format(date_time, r_id, msg_arr[0], " ".join(msg_arr[1:]))) + print("{}{} {:<9s} {}".format(date_time, r_id, msg_arr[0], " ".join(msg_arr[1:])), flush=True) try: if self.run['log_file_handle']: _l_msg = f"{date_time}{r_id} {msg_arr[0]:9}" _l_msg += ' '.join(msg_arr[1:]) self.run['log_file_handle'].write(_l_msg + "\n") + self.run['log_file_handle'].flush() except OSError: - print("{0} {1:<9s} {2}".format(date_time, "ERROR:", "Could not write log log file")) + print("{0} {1:<9s} {2}".format(date_time, "ERROR:", "Could not write log log file"), flush=True) def __init__(self, *args, **kwargs): """ From 140ed1e79fa99072d5cd1108da3ba1b49f61a786 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 16 Aug 2023 16:43:27 +0200 Subject: [PATCH 10/48] angi: tester: sap.json - todo added; expectation-rules need to be improved --- test/json/angi-ScaleOut/sap.json | 1 + 1 file changed, 1 insertion(+) diff --git a/test/json/angi-ScaleOut/sap.json b/test/json/angi-ScaleOut/sap.json index 3eb30418..e4dcb21f 100644 --- a/test/json/angi-ScaleOut/sap.json +++ b/test/json/angi-ScaleOut/sap.json @@ -4,6 +4,7 @@ "start": "step10", "sid": "HA1", "mstResource": "ms_SAPHanaCon_HA1_HDB00", + "todo": "expectations needs to be fixed - e.g. step20 sHostDown is wrong, because topology will also be stopped. roles will be ::: not master1:...", "steps": [ { "step": "step10", From 6a7ed087307ec8c42502cb93fab06da62daeaecf Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 16 Aug 2023 16:49:05 +0200 Subject: [PATCH 11/48] angi: tester: test_restart_cluster - support ScaleOut --- test/test_restart_cluster | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/test/test_restart_cluster b/test/test_restart_cluster index b46500ac..eaa00461 100755 --- a/test/test_restart_cluster +++ b/test/test_restart_cluster @@ -14,6 +14,15 @@ dir_path=$(dirname "$full_path") source "$dir_path/test_properties" source .test_properties +currPrimary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=script" | awk -F'/' '/score="150"/ { print $2 }' )" +currSecondary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=script" | awk -F'/' '/score="100"/ { print $2 }' )" + +echo "p=$currPrimary, s=$currSecondary" + +sitePrimary=$(ssh "${node01}" "SAPHanaSR-showAttr --format=script" | awk -F'=' '$0 ~ node".site" { print $2 }' node="$currPrimary") +sitePrimary="${sitePrimary//\"/}" + +echo "p=$currPrimary ($sitePrimary), s=$currSecondary" # shellcheck disable=SC2029 ssh "$node01" "crm resource cleanup $mstResource" @@ -22,9 +31,19 @@ ssh "$node02" 'crm cluster run "crm cluster stop"' echo "==== SUSE Cluster Stopped ====" -ssh "$node01" 'su - '"$sidadm"' -c "HDB start"' & -ssh "$node02" 'su - '"$sidadm"' -c "HDB start"' & -wait +ssh "$node01" 'su - '"$sidadm"' -c "sapcontrol -nr '$instNr' -function StartSystem"' +ssh "$node02" 'su - '"$sidadm"' -c "sapcontrol -nr '$instNr' -function StartSystem"' +ssh "$node01" 'su - '"$sidadm"' -c "sapcontrol -nr '$instNr' -function WaitforStarted 300 10"' +ssh "$node02" 'su - '"$sidadm"' -c "sapcontrol -nr '$instNr' -function WaitforStarted 300 10"' + +while true; do + ssh "$currPrimary" 'su - '"$sidadm"' -c "cdpy; python3 systemReplicationStatus.py 1>/dev/null"'; rc=$? + if [[ "$rc" != 15 ]]; then + sleep 60 + else + break + fi +done echo "==== SAP HANA DBs started ====" From 6cc94c7e6c280545de91a913a9547cec48af8056 Mon Sep 17 00:00:00 2001 From: lpinne Date: Thu, 17 Aug 2023 09:24:21 +0200 Subject: [PATCH 12/48] ocf_suse_SAPHanaFilesystem.7: typo in examples --- man/ocf_suse_SAPHanaFilesystem.7 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/ocf_suse_SAPHanaFilesystem.7 b/man/ocf_suse_SAPHanaFilesystem.7 index a24c8b88..000216c2 100644 --- a/man/ocf_suse_SAPHanaFilesystem.7 +++ b/man/ocf_suse_SAPHanaFilesystem.7 @@ -249,7 +249,7 @@ clone cln_SAPHanaFil_SLE_HDB00 rsc_SAPHanaFil_SLE_HDB00 \\ .br meta clone-node-max="1" notify="true" interleave="true" .PP -location SAPHanaFil_not_on_majority_maker cln_SAPHanaFIL_SLE_HDB00 -inf: vm-majority +location SAPHanaFil_not_on_majority_maker cln_SAPHanaFil_SLE_HDB00 -inf: vm-majority .RE .PP * Example on showing the current SAPHanaFilesystem rescource configuration on scale-out. From 8693be2e15d8bb509eed2948274412ea1c7c94d3 Mon Sep 17 00:00:00 2001 From: lpinne Date: Thu, 17 Aug 2023 10:33:58 +0200 Subject: [PATCH 13/48] ocf_suse_SAPHanaFilesystem.7 --- man/ocf_suse_SAPHanaFilesystem.7 | 37 ++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/man/ocf_suse_SAPHanaFilesystem.7 b/man/ocf_suse_SAPHanaFilesystem.7 index 000216c2..61f40bc0 100644 --- a/man/ocf_suse_SAPHanaFilesystem.7 +++ b/man/ocf_suse_SAPHanaFilesystem.7 @@ -311,7 +311,7 @@ for the NFS server in use. See manual pages nfs(5) and fstab(5) for details. nfs1:/export/SLE/shared/ /hana/shared/SLE/ auto defaults,rw,hard,proto=tcp,intr,noatime,vers=4,lock 0 0 .RE .PP -* Example for temporarily blocking HANA filesystems. +* Example for temporarily blocking HANA access to local filesystems. .PP This could be done for testing the SAPHanaFilesystem RA integration. Blocking the HANA filesystem is dangerous. This test should not be done on production @@ -340,6 +340,37 @@ Note: Understand the impact before trying. 5. Check HANA and Linux cluster for clean idle state. .RE .PP +* Example for temporarily blocking HANA access to NFS filesystems. +.PP +This could be done for testing the SAPHanaFilesystem RA integration. +Blocking the HANA filesystem is dangerous. This test should not be done on production +systems. +Used TCP port is 2049. See also SUSE TID 7000524. +.br +Note: Understand the impact before trying. +.PP +.RS 2 +1. Check HANA and Linux cluster for clean idle state. +.PP +2. On secondary, block /hana/shared/SLE/ filesystem. +.RS 2 +# sync /hana/shared/SLE/ +.br +# iptables -I OUTPUT -p tcp -m multiport --ports 2049 -j ACCEPT +.br +Note: The ACCEPT needs to be replaced by appropriate action. +.RE +.PP +3. Check system log for SAPHanaFilsystem entries. +.PP +4. On secondary, unblock /hana/shared/SLE/ filesystem. +.RS 2 +# iptables -D OUTPUT -p tcp -m multiport --ports 2049 -j ACCEPT +.RE +.PP +5. Check HANA and Linux cluster for clean idle state. +.RE +.PP .\" .SH FILES .TP @@ -409,7 +440,9 @@ Please report any other feedback and suggestions to feedback@suse.com. .br https://documentation.suse.com/sbp/sap/ , .br -https://www.suse.com/support/kb/doc/?id=000019904 +https://www.suse.com/support/kb/doc/?id=000019904 , +.br +https://www.suse.com/support/kb/doc/?id=000016649 .PP .\" .SH AUTHORS From 623f299b42e61b49fc259e23e6d2f887d782f14d Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Thu, 17 Aug 2023 12:04:27 +0200 Subject: [PATCH 14/48] angi: tester: test_restart_cluster_turn_hana - wait for SR to stabilize the takeover with primary suspend --- test/test_restart_cluster_turn_hana | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/test_restart_cluster_turn_hana b/test/test_restart_cluster_turn_hana index 66ccb2d4..07d8821e 100755 --- a/test/test_restart_cluster_turn_hana +++ b/test/test_restart_cluster_turn_hana @@ -21,6 +21,15 @@ source .test_properties ssh "${node01}" "crm resource cleanup $mstResource" currPrimary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=script" | awk -F'/' '/score="150"/ { print $2 }' )" +currPrimary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=script" | awk -F'/' '/score="150"/ { print $2 }' )" +currSecondary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=script" | awk -F'/' '/score="100"/ { print $2 }' )" + +echo "p=$currPrimary, s=$currSecondary" + +sitePrimary=$(ssh "${node01}" "SAPHanaSR-showAttr --format=script" | awk -F'=' '$0 ~ node".site" { print $2 }' node="$currPrimary") +sitePrimary="${sitePrimary//\"/}" + +echo "p=$currPrimary ($sitePrimary), s=$currSecondary" currSecondary="$(ssh "${node01}" "SAPHanaSR-showAttr --format=script" | awk -F'/' '/score="100"/ { print $2 }' )" echo "p=$currPrimary, s=$currSecondary" From 08856ba37720e315bc98a26b033120e12e4ced73 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Thu, 17 Aug 2023 12:06:02 +0200 Subject: [PATCH 15/48] angi: package: SAPHanaSR-tester.spec - updated version to reflect progress in the tester --- SAPHanaSR-tester.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SAPHanaSR-tester.spec b/SAPHanaSR-tester.spec index 9a2e854c..abb3d3c1 100644 --- a/SAPHanaSR-tester.spec +++ b/SAPHanaSR-tester.spec @@ -19,7 +19,7 @@ License: GPL-2.0 Group: Productivity/Clustering/HA AutoReqProv: on Summary: Test suite for SAPHanaSR clusters -Version: 1.1.0 +Version: 1.2.0 Release: 0 Url: https://www.suse.com/c/fail-safe-operation-of-sap-hana-suse-extends-its-high-availability-solution/ From e692be53acfb6a227acb515842a666cc38c449eb Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Fri, 18 Aug 2023 12:48:10 +0200 Subject: [PATCH 16/48] angi: tester: SAPHanaSR-testCluster-multiNode, saphana_sr_test.py, properties.json - auto-generate ".test_properties" used by shell test scipts --- test/SAPHanaSR-testCluster-multiNode | 25 +++++++++++++++++++++++++ test/json/angi-ScaleOut/properties.json | 1 + test/json/angi-ScaleUp/properties.json | 4 +++- test/saphana_sr_test.py | 20 +++++++++++++++++++- 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/test/SAPHanaSR-testCluster-multiNode b/test/SAPHanaSR-testCluster-multiNode index 4b258f43..2a8c2b8c 100755 --- a/test/SAPHanaSR-testCluster-multiNode +++ b/test/SAPHanaSR-testCluster-multiNode @@ -38,6 +38,8 @@ parser.add_argument("--repeat", help="how often to repeat the test") parser.add_argument("--dumpFailures", help="print failed checks per loop", action="store_true") parser.add_argument("--logFile", help="log file to write the messages") +parser.add_argument("--printTestProperties", help="print test properties and exit", + action="store_true") args = parser.parse_args() if args.testFile: test01.message("PARAM: testFile: {}".format(args.testFile)) @@ -65,6 +67,9 @@ if args.logFile: # disable 'consider to use with ...' - I am pretty sure with does not match here # pylint: disable-next=R1732 test01.run['log_file_handle'] = open(test01.config['log_file'], 'a', encoding="utf-8") +if args.printTestProperties: + test01.config['printTestProperties'] = args.printTestProperties + test01.message(f"PARAM: printTestProperties: {test01.config['printTestProperties']}") while test01.run['count'] <= test01.config['repeat']: test01.run['r_id'] = random.randrange(10000,99999,1) @@ -115,7 +120,27 @@ while test01.run['count'] <= test01.config['repeat']: f" sWorker={l_top['sWorker']}" ) test01.message(l_msg) + test01.read_test_file() + ### debug exit after printing test properties + if test01.config['printTestProperties'] is True: + p_msg = ( + f"test_properties:" + f" node01={l_top.get('pHost','node01')}" + f" node02={l_top.get('sHost','node02')}" + f" mstResource={test01.test_data.get('mstResource','')}" + f" clnResource={test01.test_data.get('clnResource','')}" + f" srMode=sync" + f" opMode=logreplay" + f" SID={test01.test_data.get('sid','C11')}" + f" instNr={test01.test_data.get('instNo','00')}" + f" sidadm={test01.test_data.get('sid','C11').lower()}adm" + f" userkey={test01.test_data.get('userKey','')}" + ) + test01.message(p_msg) + break + test01.write_test_properties(l_top) + my_test_id = test01.run['test_id'] if test01.config['repeat'] != 1: test01.message("TEST: {} testNr={} ######".format(my_test_id, test01.run['count'])) diff --git a/test/json/angi-ScaleOut/properties.json b/test/json/angi-ScaleOut/properties.json index 8da3b39e..7f71d029 100644 --- a/test/json/angi-ScaleOut/properties.json +++ b/test/json/angi-ScaleOut/properties.json @@ -1,5 +1,6 @@ { "sid": "HA1", + "instNo": "10", "mstResource": "mst_SAPHanaCon_HA1_HDB10", "clnResource": "cln_SAPHanaTop_HA1_HDB10" } diff --git a/test/json/angi-ScaleUp/properties.json b/test/json/angi-ScaleUp/properties.json index 0d53d49f..37d8a8ba 100644 --- a/test/json/angi-ScaleUp/properties.json +++ b/test/json/angi-ScaleUp/properties.json @@ -1,5 +1,7 @@ { "sid": "HA1", + "instNo": "00", "mstResource": "mst_SAPHanaCon_HA1_HDB00", - "clnResource": "cln_SAPHanaTop_HA1_HDB00" + "clnResource": "cln_SAPHanaTop_HA1_HDB00", + "userKey": "TESTER" } diff --git a/test/saphana_sr_test.py b/test/saphana_sr_test.py index 400939ad..23a9896c 100755 --- a/test/saphana_sr_test.py +++ b/test/saphana_sr_test.py @@ -61,7 +61,8 @@ def __init__(self, *args, **kwargs): 'repeat': 1, 'dump_failures': False, 'remote_node': None, - 'remote_nodes': [] + 'remote_nodes': [], + 'printTestProperties': False } self.dict_sr = {} self.test_data = {} @@ -253,20 +254,37 @@ def pretty_print(self, dictionary,level): def read_test_file(self): """ read Test Description, optionally defaultchecks and properties """ if self.config['properties_file']: + print(f"read properties file {self.config['properties_file']}") with open(self.config['properties_file'], encoding="utf-8") as prop_fh: self.test_data.update(json.load(prop_fh)) if self.config['defaults_checks_file']: + print(f"read defaults file {self.config['defaults_checks_file']}") with open(self.config['defaults_checks_file'], encoding="utf-8") as dc_fh: self.test_data.update(json.load(dc_fh)) if self.config['test_file'] == "-": self.test_data.update(json.load(sys.stdin)) else: with open(self.config['test_file'], encoding="utf-8") as tf_fh: + print(f"read test file {self.config['test_file']}") self.test_data.update(json.load(tf_fh)) self.run['test_id'] = self.test_data['test'] self.message("DEBUG: test_data: {}".format(str(self.test_data)), stdout=False) + def write_test_properties(self, l_top): + with open(".test_properties", 'w', encoding="utf-8") as test_prop_fh: + test_prop_fh.write(f"node01={l_top.get('pHost','node01')}\n") + test_prop_fh.write(f"node02={l_top.get('sHost','node02')}\n") + test_prop_fh.write(f"mstResource={self.test_data.get('mstResource','')}\n") + test_prop_fh.write(f"clnResource={self.test_data.get('clnResource','')}\n") + test_prop_fh.write(f"srMode=sync\n") + test_prop_fh.write(f"opMode=logreplay\n") + test_prop_fh.write(f"SID={self.test_data.get('sid','C11')}\n") + test_prop_fh.write(f"instNr={self.test_data.get('instNo','00')}\n") + test_prop_fh.write(f"sidadm={self.test_data.get('sid','C11').lower()}adm\n") + test_prop_fh.write(f"userkey={self.test_data.get('userKey','')}\n") + test_prop_fh.flush() + def __add_failed__(self, area_object, key_val_reg): """ document failed checks """ if 'failed' in self.run: From 9f06098a780e1f053e85fc1d8b739096207a97a2 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 22 Aug 2023 07:56:53 +0200 Subject: [PATCH 17/48] angi: package: ra/saphana-common-lib, ra/saphana-filesystem-lib - avoid using own files in /tmp --- ra/saphana-common-lib | 6 +++--- ra/saphana-filesystem-lib | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ra/saphana-common-lib b/ra/saphana-common-lib index 8e1f3cc5..97ad9c99 100755 --- a/ra/saphana-common-lib +++ b/ra/saphana-common-lib @@ -420,9 +420,9 @@ function HANA_CALL() { * ) errExt=$(date '+%s%N')_${sid}adm # TODO PRIO1: NG - need to differ files for Topology and Controller - su_err_log="/tmp/HANA_CALL_SU_${raType}${errExt}" - cmd_out_log="/tmp/HANA_CALL_CMD_${raType}OUT_${errExt}" - cmd_err_log="/tmp/HANA_CALL_CMD_${raType}ERR_${errExt}" + su_err_log="/run/HANA_CALL_SU_${raType}${errExt}" + cmd_out_log="/run/HANA_CALL_CMD_${raType}OUT_${errExt}" + cmd_err_log="/run/HANA_CALL_CMD_${raType}ERR_${errExt}" # TODO PRIO2: NG - ScaleUp had 'timeout "$timeOut" $pre_cmd "($pre_script; $cmd > $cmd_out_log)' # 'output=$(timeout "$timeOut" $pre_cmd "($pre_script; $cmd > $cmd_out_log) >& $cmd_err_log" 2>"$su_err_log"); rc=$? diff --git a/ra/saphana-filesystem-lib b/ra/saphana-filesystem-lib index 3e1cd28a..0c630ef8 100755 --- a/ra/saphana-filesystem-lib +++ b/ra/saphana-filesystem-lib @@ -252,8 +252,8 @@ function shfs_reset_poison_pill() { } function shfs_error_simulation() { - if [ -e "/tmp/break_SAPHanaFilesystem_${SID}" ]; then - rm "/tmp/break_SAPHanaFilesystem_${SID}" + if [ -e "/run/break_SAPHanaFilesystem_${SID}" ]; then + rm "/run/break_SAPHanaFilesystem_${SID}" return 0 else return 1 From df72d76e0d805bcdd0e13e7b97adb65c033fc61a Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 22 Aug 2023 11:33:34 +0200 Subject: [PATCH 18/48] angi: performance: saphana-common-lib - try to eliminate forking awk where possible - in first step keep old code to compare the results --- ra/saphana-common-lib | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/ra/saphana-common-lib b/ra/saphana-common-lib index 97ad9c99..cc4b12ce 100755 --- a/ra/saphana-common-lib +++ b/ra/saphana-common-lib @@ -559,12 +559,22 @@ function get_local_sr_config() { gP ) # call getParameter (gP) local gpKeys="" gpKeys=$(echo --key=global.ini/system_replication/{actual_mode,mode,site_name,site_id}) - hdbANSWER=$(HANA_CALL --timeout "$HANA_CALL_TIMEOUT" --cmd "python getParameter.py $gpKeys --sapcontrol=1" 2>&1 | awk -F/ 'BEGIN {out=0} /^SAPCONTROL-OK: / { out=1 } /^SAPCONTROL-OK: / { out=0 } /=/ {if (out==1) {print $3} }') - gSrMode=$(echo "$hdbANSWER" | awk -F= '$1=="actual_mode" {print $2}') + hdbANSWER_OLD=$(HANA_CALL --timeout "$HANA_CALL_TIMEOUT" --cmd "python getParameter.py $gpKeys --sapcontrol=1" 2>&1 | awk -F/ 'BEGIN {out=0} /^SAPCONTROL-OK: / { out=1 } /^SAPCONTROL-OK: / { out=0 } /=/ {if (out==1) {print $3} }') + hdbANSWER=$(HANA_CALL --timeout "$HANA_CALL_TIMEOUT" --cmd "python getParameter.py $gpKeys --sapcontrol=1" 2>&1) + [[ "$hdbANSWER" =~ "SAPCONTROL-OK: "(.*)"SAPCONTROL-OK: " ]] && hdbANSWER="${BASH_REMATCH[1]}" || hdbANSWER="" + # TODO: delete gSrModeOLD after checking, that gSrModeOLD == gSrMode (new method) + gSrModeOLD=$(echo "$hdbANSWER_OLD" | awk -F= '$1=="actual_mode" {print $2}') + [[ "$hdbANSWER" =~ "/actual_mode"=([^$'\n']+) ]] && gSrMode=${BASH_REMATCH[1]} + # TODO: delete gSrModeOLD after checking, that gSrModeOLD == gSrMode (new method) + if [ -z "$gSrModeOLD" ]; then + gSrModeOLD=$(echo "$hdbANSWER_OLD" | awk -F= '$1=="mode" {print $2}') + fi # if 'actual_mode' is not available, fallback to 'mode' if [ -z "$gSrMode" ]; then - gSrMode=$(echo "$hdbANSWER" | awk -F= '$1=="mode" {print $2}') + [[ "$hdbANSWER" =~ "/mode="([^$'\n']+) ]] && gSrMode=${BASH_REMATCH[1]} fi + # TODO: delete gSrModeOLD after checking, that gSrModeOLD == gSrMode (new method) + super_ocf_log info "ACT: gSrModeOLD=$gSrModeOLD; gSrMode=$gSrMode" super_ocf_log info "ACT: hdbnsutil not answering - using global.ini as fallback - srmode=$gSrMode" ;; hU | * ) # call hdbnsUtil (hU) ( also for unknown chkMethod ) From c9b113b6b0384a616b9558f645b37e3a82508bbf Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 22 Aug 2023 19:54:16 +0200 Subject: [PATCH 19/48] angi: doc: SAPHanaSR-showAttr-adoc.8- add a adoc test file --- man/SAPHanaSR-showAttr-adoc.8 | 55 +++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 man/SAPHanaSR-showAttr-adoc.8 diff --git a/man/SAPHanaSR-showAttr-adoc.8 b/man/SAPHanaSR-showAttr-adoc.8 new file mode 100644 index 00000000..03538c35 --- /dev/null +++ b/man/SAPHanaSR-showAttr-adoc.8 @@ -0,0 +1,55 @@ += SAPHanaSR-showAttr-adoc(1) +Lars Pinne, Fabian Herschel +v1.001 +:doctype: manpage +:manmanual: SAPHanaSR-showAttr-adoc +:mansource: SAPHanaSR-showAttr-adoc +:man-linkstyle: pass:[blue R < >] + +== Name + +SAPHanaSR-showAttr - Shows Linux cluster attributes for SAP HANA system replication. + +== Synopsis + +SAPHanaSR-showAttr [ --help | --version | --path2table ] + +SAPHanaSR-showAttr\fR [ --sid=SID[:INO] ] [ --select=SELECTION ] [ --sort=FIELD ] [ --format=FORMAT ] [ --cib=OFFLINE_CIB_FILE ] + +== + +SAPHanaSR-showAttr shows Linux cluster attributes for SAP HANA system replication automation. +The overall system replication (SR) state is shown as well as the HANA state +on each node. +Because the HANA srHook methods srConnectionChanged() and preTakeover() are +used, respective information shows up as well. +The information is fetched from the Linux cluster information base (CIB), not +from HANA directly. +Fields to be shown can be specified by pre-defined selections via command line option. + +The output shows four sections, containing all or some of the listed +fields: + +Global section + +*global (Global)*:: + constant + +*cib-time*:: + date and time of record + +== Exit status + +TODO + +*0*:: + TODO 0 + +*1*:: + TODO 1 + +== Resources + +== Copying + +Copyright (C) 2014 {author}. + From 599d1e9009e8ac5267d744988ba530199c469e92 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 22 Aug 2023 19:56:22 +0200 Subject: [PATCH 20/48] moving test file (test only) --- man/{SAPHanaSR-showAttr-adoc.8 => SAPHanaSR-showAttr-adoc.8.adoc} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename man/{SAPHanaSR-showAttr-adoc.8 => SAPHanaSR-showAttr-adoc.8.adoc} (100%) diff --git a/man/SAPHanaSR-showAttr-adoc.8 b/man/SAPHanaSR-showAttr-adoc.8.adoc similarity index 100% rename from man/SAPHanaSR-showAttr-adoc.8 rename to man/SAPHanaSR-showAttr-adoc.8.adoc From 5ede9d56a218ee2c1dc5ee3b48dc4c6dc8dc4bce Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 22 Aug 2023 20:08:05 +0200 Subject: [PATCH 21/48] adoc test file only --- man/vc_SAPHanaSR-showAttr.adoc | 463 +++++++++++++++++++++++++++++++++ 1 file changed, 463 insertions(+) create mode 100644 man/vc_SAPHanaSR-showAttr.adoc diff --git a/man/vc_SAPHanaSR-showAttr.adoc b/man/vc_SAPHanaSR-showAttr.adoc new file mode 100644 index 00000000..94186b90 --- /dev/null +++ b/man/vc_SAPHanaSR-showAttr.adoc @@ -0,0 +1,463 @@ += SAPHanaSR-showAttr +08 May 2023 + +== NAME + +SAPHanaSR-showAttr - Shows Linux cluster attributes for SAP HANA system +replication. + +== SYNOPSIS + +*SAPHanaSR-showAttr* [ --help | --version | --path2table ] + +*SAPHanaSR-showAttr* [ --sid=SID[:INO] ] [ --select=SELECTION ] [ +--sort=FIELD ] [ --format=FORMAT ] [ --cib=OFFLINE_CIB_FILE ] + +== DESCRIPTION + +SAPHanaSR-showAttr shows Linux cluster attributes for SAP HANA system +replication automation. The overall system replication (SR) state is +shown as well as the HANA state on each node. Because the HANA srHook +methods srConnectionChanged() and preTakeover() are used, respective +information shows up as well. The information is fetched from the Linux +cluster information base (CIB), not from HANA directly. Fields to be +shown can be specified by pre-defined selections via command line +option. + +The output shows four sections, containing all or some of the listed +fields: + +Global section:: + + + global (*Global*) + + date and time of record (*cib-time*) + + maintenance status of Linux cluster (*maintenance*) + + filter for resource agent logging (*filter*) + + HANA primary replication site (*prim*) + + HANA secondary replication site (*sec*) + + HANA system identifier (*sid*) + + HANA topology (*topology*) +Resources section:: + + + name of Linux cluster resources (*Resources*) + + maintenance status of Linux cluster resources (*maintenance*) +Sites section:: + HANA site name (*Sites*) + + SAPHanaSR last primary timestamp (*lpt*) + + HANA landscape status (*lss*) + + HANA current master nameserver for that site (*mns*) + + HANA replication operation mode (*opMode*) + + HANA system replication status from HA/DR provider hook (*srHook*) + + HANA system replication mode (*srMode*) + + HANA system replication status from RA monitor (*srPoll*) + + HANA system replication role (*srr*) +Hosts section:: + + + Linux hostname (*Hosts*) + + state of the Linux cluster resource (*clone_state*) + + state of Linux cluster node (*node_state*) + + actual and configured roles for HANA on that node (*roles*) + + actual master score on that node (*score*) + + HANA site where the host sits (*site*) + + maintenance state of Linux cluster node (*standby*) + + HANA system replication takeover action, indicated by preTakeover() + (*sra*) + + HANA system replication takeover action history, (*srah*) + + Linux cluster node fence status (*terminate*) + + HANA version (*version*) + + HANA virtual host name (*vhost*) + + +Some fields are generated dynamically from the CIB. That fields are +missing, if no corresponding attribute exists, e.g. the standby field. +An incomplete compilation on field details and purpose follows below. + +*Global* - the string global + +Value: global + +The Global field is used as root for the whole data structure. The +Global section shows overall information on the system. + +*Hosts* - Linux hostname + +Value: [ _HOSTNAME_ ] + +Hostnames of HANA nodes managed by the Linux cluster. The hostname has +to follow UNIX RFC 1178. Additional rules specific to SAP apply. E.g. +length of hostname is limited to 13 characters. See also the fields +remoteHost and vhost. The Hosts section shows one line per host, +containing details on that host and the HANA instance there. + +*Resources* - name of Linux cluster resources + +Value: [ _RESOURCENAME_ ] + +Name of the multi-state resource controlled by SAPHanaController, +occasionally also the clone resource SAPHanaTopology. The Resources +section shows the HANA cluster resources and their cluster maintenance +attributeĀ“s status. + +*Sites* - HANA site + +Value: [ _HANA_SITE_ ] + +The HANA sites are detected by SAPHanaTopology. Two sites are managed by +the Linux cluster. Optionally a third site could be connected thru +multi-target system replication outside the cluster. The Sites section +shows information on the sites known to the Linux cluster and the HANA +system replication status. + +*clone_state* - clone state of the Linux cluster resource + +Value: [ PROMOTED | DEMOTED | UNDEFINED | WAITING4LPA | WAITING4PRIM | +WAITING4NODES | WAITING4REG ] + +This variable is set by the SAPHanaController resource agent. + +PROMOTED marks on scale-up the master state of an instance, which makes +an HANA SR primary. On scale-out it indicates the master nameserver of +the primary site. + +DEMOTED makes on scale-up an HANA SR secondary. On scale-out all running +nodes except the primary master nameserver are marked as DEMOTED. The +DEMOTED state can be transient, on the way to promoting. + +WAITING4LPA indicates potential risk of a dual primary situation. The +cluster is waiting for DUPLICATE_PRIMARY_TIMEOUT seconds before +registering the former HANA primary as new secondary. + +WAITING4PRIM indicates the cluster is waiting for the HANA primary +becoming ready in order to register the HANA secondary. + +WAITING4NODES indicates on scale-out that not enough nodes are available +to bring up a functional site, according to the HANA landscape +configuration. + +WAITING4REG indicates potential risk of a dual primary situation. The +cluster is waiting for the administrator to manually register the former +HANA primary as new secondary. + +See also AUTOMATED_REGISTER and DUPLICATE_PRIMARY_TIMEOUT in +ocf_suse_SAPHanaController(7). + +*maintenance* - maintenance status of Linux cluster or cluster resource + +Value: [ true | false ] + +This is a Linux cluster node attribute. It is set by an admin. The +attribute is shown after it has been changed from the default. The field +might appear or disappear, depending on cluster maintenance tasks. See +also the standby attribute below. + +*filter* - SAPHanaController filter for logging + +Value: [ _filter_ ] + +The filter is defined by hana__glob_filter . See +ocf_suse_SAPHanaController(7) for details. + +*lpt* - last primary timestamp + +Value: [ _UNIX_EPOCH_ | 30 | 20 | 10 | 0 ] + +The node attribute lpa__lpt or cluster attribute +hana__site_lpt_ is set by the SAPHanaController resource +agent. When the RA detects a running HANA SR primary on the node, it +records the Unix Epoch time. See section DUPLICATE_PRIMARY_TIMEOUT +ocf_suse_SAPHanaController(7). + +Auxiliary values are set for an HANA SR secondary. 30 means the instance +is evictable for primary. 10 means: not yet ready. + +*lss* - landscape status + +Value: [ 4 | 3 | 2 | 1 | 0 ] + +The site attribute lss shows the return code of HANA's +landscapeHostConfiguration.py. + +Value: [ 4 | 3 | 2 | 1 | 0 ] + +This field contains the return code of landscapHostConfiguration.py. The +parameter does not tell you if the secondary system is ready for a +takeover. The meaning is different from common Linux return codes. + +4 = OK - Everything looks perfect on the HANA primary. + +3 = WARNING - A HANA Host Auto-Failover is taking place. + +2 = INFO - The landscape is completely functional, but the actual role +of the host differs from the configured role. + +1 = DOWN - There are not enough active hosts. + +0 = FATAL - Internal script error, the state could not be determined. + +See landscapeHostConfiguration.py an ocf_suse_SAPHanaController(7). + +*mns* - master nameserver + +Value: [ _HANA_MASTERNAMESERVER_ ] + +The cluster attribute hana__site_mns_ shows the current HANA +master nameserver for the given site. It is empty for sites outside the +Linux cluster. + +*srr* - system replication role + +Value: [ "P"rimary | "S"econdary | "N"one ] + +The site attribute srr shows the current HANA system replication role +for the given scale-out site. It is empty for sites outside the Linux +cluster. For scale-up see roles sub-field B of the Hosts section. + +*node_state* - state of the Linux cluster node + +Value: [ online | offline ] + +*opMode* - HANA SR operations mode + +Value: [ logreplay | delta_datashipping | logreplay_readaccess ] + +The node attribute hana__op_mode is set by SAPHanaTopology, +according to the running HANA. The attribute is used by the +SAPHanaController resource agent for setting up system replication. +delta_datashipping is not recommended in the context of Linux clusters. + +*remoteHost* - HANA SR remote host + +Value: [ _HOSTNAME_ ] + +The node attribute hana__remoteHost is set by SAPHanaTopology, +according to the running HANA. The attribute is used by the +SAPHanaController resource agent for setting up system replication. See +also the fields Hosts and vhost. + +*roles* - actual and configured roles for HANA on that node + +The roles field in the Hosts section has four sub-fields (A:B:C:D). For +scale-up that four fields should be master1:master:worker:master. + + +Field A: NameServer Config Role + +Value: [ master1 | master2 | master3 | worker | slave | standby | +shtdown ] + + +Field B: NameServer Actual Role + +Value: [ master | slave | standby | shtdown ] + + +Field C: IndexServer Config Role + +Value: [ master1 | master2 | master3 | worker | slave | standby | +shtdown ] + + +Field D: IndexServer Actual Role + +Value: [ master | worker | slave | standby | shtdown ] + + +*score* - actual master score on that node + +Value: [ 150 | 145 | 140 | 115 | 110 | 100 | 90 | 80 | 70 | 60 | 10 | 5 +| 0 | -1 | -9000 | -10000 | -12200 | -22100 | -22200 | -32300 | -33333 | +-INFINITY ] + +This is a variable of the SAPHanaController resource agent. It is +calculated based on an internal scoring table. A value of 150 should +cause the Linux cluster promoting the local resource instance to HANA SR +primary master nameserver. 140 indicates a HANA primary master +nameserver candidate. 100 indicates the HANA secondary master +nameserver. This field should not be empty. + +Note: The effective resource scoring used by the Linux cluster differs +from the above values because the cluster engine takes into account +other factors as well. + +*sid* - HANA system indentifier + +Value: [ _SID_ ] + +The SID is the same for the pair of HANA system replication databases in +the Linux cluster. Also an HANA database connected thru mulit-target +replication outside the cluster has this SID. + +*site* - HANA site where the host sits + +Value: [ _HANA_SITE_ ] + +The node attribute hana__site is set by SAPHanaTopology, according +to the running HANA. The attribute is used by the SAPHanaController +resource agent for setting up system replication. A dash (-) indicates +the RA did not run or did not recognize the site. + +*srMode* - HANA SR mode + +Value: [ sync | syncmem ] + +The node attribute hana__glob_srmode is set by SAPHanaTopology, +according to the running HANA. The attribute is used by the +SAPHanaController resource agent for setting up system replication. SAP +HANA knows also async and fullsync (see URLs below). Those do not make +sense for automating HANA system replication by an Linux cluster. + +*standby* - maintenance state of Linux cluster node + +Value: [ on | off ] + +This is a Linux cluster node attribute. It is set by an admin. The +attribute is shown after it has been changed from the default. The field +might appear or disappear, depending on cluster maintenance tasks. See +also the maintenance attribute above. + +*srPoll* - HANA SR status + +Value: [ SOK | SFAIL | SWAIT | SREG | PRIM ] + +The cluster property hana__glob_sync_state is set by the +SAPHanaController resource agent. The first three values are +representing an HANA system replication status, recognized at latest RA +run, see ocf_suse_SAPHanaController(7) and systemReplicationStatus.py +. + +The 4th value (PRIM) just indicates an HANA SR primary. + +*sra* - HANA system replication action + +Value: [ T | R | - ] + +The node attribute system replication action is checked by the HA/DR +provider susTkOver.py using the API method preTakeover(). It is set by +the SAPHanaController resource agent. It indicates whether a takeover or +registration is ongoing. This attribute may not be persisted in pengine +files. + +T = Takeover on new primary (sr_takeover) ongoing. + +R = Registration on new secondary (sr_register) ongoing. + +- = No action pending. + +*srah* - HANA system replication action history + +Value: [ T | R | - ] + +The node attribute system replication action history stores actions in +CIB attributes for later use, for root cause analysis. + +*srHook* - HANA replication channel state, indicated by +srConnectionChanged + +Value: [ SOK | SFAIL | SWAIT | SREG | PRIM ] + +The cluster attributes related to srHook is +hana__site_srHook_. It represents the HANA SR status from +HA/DR provider API method srConnectionChanged(). See SAPHanaSR(7) and +SAPHanaSR-ScaleOut(7) for supported API versions and scenarios. The +attribute is not updated if the cluster is not running. Thus if the +cluster is shut down while HANA remains running, the content of srHook +might be outdated on cluster start until the next srConnectionChanged() +event. See susHanaSR.py(7) and susHanaSrMultiTarget.py(7) for details. + +*terminate* - Linux cluster node fence status + +Value: [ true ] + +Indicates whether that node is requested for being fenced from outside +the Linux cluster. The attribute is removed as soon as the node has been +successfully fenced. + +*version* - HANA version + +Value: [ _HANA_VERSION_ ] + +Version of the HANA instance on that node. Of course, should be +supported for the given Linux version. Should be same on all nodes, +except during specific HANA upgrade procedure. + +*vhost* - HANA virtual hostname + +Value: [ _HANA_VIRT_HOSTNAME_ ] + +The virtual hostname is used by the HANA instance instead of Linux +hostname. The node attribute hana__vhost is set by SAPHanaTopology, +according to the running HANA. The attribute is used by the +SAPHanaController resource agent for setting up system replication. See +also the fields Hosts and remoteHost. SAPHanaToplogy needs the +SAPHOSTAGENT to map from the local hostname to the HANA virtual +hostname. + +== OPTIONS + +* --help*:: + show help. +* --version*:: + show version. +* --path2table*:: + convert script-style input pipe back into normal output. +show selected information only. Allowed values: [ all | default | +minimal | sr ]. Default is default.:: +* --sid=*_SID_[:_INO_]:: + use SAP system ID _SID_. Should be autodetected, if there is only one + SAP HANA instance installed on the local cluster node. The SAP system + ID is a 3 alphanum string with a valid SAP system name like SLE, HAE, + FH1, C11, or P42. Optional: Use SAP instance number _INO_. Should be + autodetected, if there is only one SAP HANA instance installed on the + local cluster node. The SAP instance number must be represented by a + two digit numer like 00, 05 or 42. Some numbers ares not allowed, e.g. + 98. +--sort=__FIELD__:: + sort Hosts section table by field. Allowed values: [ roles | site ]. + Default is sort by hostnames. +* --format=*_FORMAT_:: + output format. Allowed values: [ script | tables ]. Default is tables. +* --cib=*_OFFLINE_CIB_FILE_:: + read data from given offline CIB file. + +== RETURN CODES + +*0* Successful program execution. + +*>0* Usage, syntax or execution errors. + +== EXAMPLES + +# SAPHanaSR-showAttr:: + show all SAPHanaSR attributes and relevant cluster maintenance + states. + + If the roles sub-fields are 1:P:::: landscapeHostConfiguration.py has + not been able to detect the HANA roles during last recent RA monitor + operation. Likely HANA was down or sudo adm failed. +# SAPHanaSR-showAttr --sort=roles:: + show all SAPHanaSR attributes in the cluster and sort host table + output by roles. +# SAPHanaSR-showAttr --sid=HA1:10 +--cib=./hb_report-17-07-2019/grauenstein01/cib.xml:: + show all SAPHanaSR attributes for SAP System ID HA1 and instance + number 10 from given CIB file. +# SAPHanaSR-showAttr | grep -e master: -e worker: -e slave::: + show SAPHanaSR promotion scores on running nodes. +# SAPHanaSR-showAttr --format=script | egrep -v +'/(version|op_mode|vhost|remoteHost|node_state|site)=' | +SAPHanaSR-showAttr --path2table:: + reduce output to selected fields. + +== FILES + +/usr/bin/SAPHanaSR-showAttr:: + the program itself. +/usr/lib/SAPHanaSR-angi/SAPHanaSRTools.pm:: + needed functions. +/usr/sap/hostctrl/exe/saphostctrl:: + the SAP host control command. + +== BUGS + +Formatting and content of this script's output will change, since this +script is under development. This script is not intended to be called +from monitoring tools. For monitoring please use SAPHanaSR-monitor +instead. + +In case of any problem, please use your favourite SAP support process to +open a request for the component BC-OP-LNX-SUSE. Please report any other +feedback and suggestions to feedback@suse.com. + +== SEE ALSO + +*ocf_suse_SAPHanaController*(7) , *ocf_suse_SAPHanaTopology*(7) , +*SAPHanaSR-ScaleOut*(7) , *SAPHanaSR-replay-archive*(8) , +*SAPHanaSR-filter*(8) , *SAPHanaSR-monitor*(8) , +*SAPHanaSR_maintenance_examples*(7) , *SAPHanaSR-manageAttr*(8) , +*crm_simulate*(8) , *crm_report*(8) , *cibadmin*(8) , *crm_mon*(8) , +*crm_attribute*(8) , *cs_convert_time*(8) , *cs_clusterstate*(8) , +*cs_show_hana_info*(8) , *cs_show_scores*(8) , + +https://documentation.suse.com/sbp/sap/ , + +https://documentation.suse.com/sles-sap/ , + +https://www.susecon.com/doc/2015/sessions/TUT19921.pdf , + +https://www.susecon.com/doc/2016/sessions/TUT90846.pdf , + +https://www.susecon.com/archive-2020.html + +== AUTHORS + +A.Briel, F.Herschel, L.Pinne. + +== COPYRIGHT + +{empty}(c) 2014 SUSE Linux Products GmbH, Germany. + +(c) 2015-2017 SUSE Linux GmbH, Germany. + +(c) 2018-2023 SUSE LLC + +SAPHanaSR-showAttr comes with ABSOLUTELY NO WARRANTY. + +For details see the GNU General Public License at +http://www.gnu.org/licenses/gpl.html From cd853e903aa2dcfcd4c61480cac83c07cce03f64 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 23 Aug 2023 16:26:46 +0200 Subject: [PATCH 22/48] angi: infra: saphana-*-lib: Use runDir instead of /tmp for stdin,stderr redirection; reduced awk calls --- ra/saphana-common-lib | 35 +++++++++-------------------------- ra/saphana-controller-lib | 7 +++++++ ra/saphana-topology-lib | 7 +++++++ 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/ra/saphana-common-lib b/ra/saphana-common-lib index cc4b12ce..0e0525c0 100755 --- a/ra/saphana-common-lib +++ b/ra/saphana-common-lib @@ -420,9 +420,9 @@ function HANA_CALL() { * ) errExt=$(date '+%s%N')_${sid}adm # TODO PRIO1: NG - need to differ files for Topology and Controller - su_err_log="/run/HANA_CALL_SU_${raType}${errExt}" - cmd_out_log="/run/HANA_CALL_CMD_${raType}OUT_${errExt}" - cmd_err_log="/run/HANA_CALL_CMD_${raType}ERR_${errExt}" + su_err_log="${runDir}/HANA_CALL_SU_${raType}${errExt}" + cmd_out_log="${runDir}/HANA_CALL_CMD_${raType}OUT_${errExt}" + cmd_err_log="${runDir}/HANA_CALL_CMD_${raType}ERR_${errExt}" # TODO PRIO2: NG - ScaleUp had 'timeout "$timeOut" $pre_cmd "($pre_script; $cmd > $cmd_out_log)' # 'output=$(timeout "$timeOut" $pre_cmd "($pre_script; $cmd > $cmd_out_log) >& $cmd_err_log" 2>"$su_err_log"); rc=$? @@ -559,29 +559,21 @@ function get_local_sr_config() { gP ) # call getParameter (gP) local gpKeys="" gpKeys=$(echo --key=global.ini/system_replication/{actual_mode,mode,site_name,site_id}) - hdbANSWER_OLD=$(HANA_CALL --timeout "$HANA_CALL_TIMEOUT" --cmd "python getParameter.py $gpKeys --sapcontrol=1" 2>&1 | awk -F/ 'BEGIN {out=0} /^SAPCONTROL-OK: / { out=1 } /^SAPCONTROL-OK: / { out=0 } /=/ {if (out==1) {print $3} }') hdbANSWER=$(HANA_CALL --timeout "$HANA_CALL_TIMEOUT" --cmd "python getParameter.py $gpKeys --sapcontrol=1" 2>&1) [[ "$hdbANSWER" =~ "SAPCONTROL-OK: "(.*)"SAPCONTROL-OK: " ]] && hdbANSWER="${BASH_REMATCH[1]}" || hdbANSWER="" - # TODO: delete gSrModeOLD after checking, that gSrModeOLD == gSrMode (new method) - gSrModeOLD=$(echo "$hdbANSWER_OLD" | awk -F= '$1=="actual_mode" {print $2}') [[ "$hdbANSWER" =~ "/actual_mode"=([^$'\n']+) ]] && gSrMode=${BASH_REMATCH[1]} - # TODO: delete gSrModeOLD after checking, that gSrModeOLD == gSrMode (new method) - if [ -z "$gSrModeOLD" ]; then - gSrModeOLD=$(echo "$hdbANSWER_OLD" | awk -F= '$1=="mode" {print $2}') - fi # if 'actual_mode' is not available, fallback to 'mode' if [ -z "$gSrMode" ]; then [[ "$hdbANSWER" =~ "/mode="([^$'\n']+) ]] && gSrMode=${BASH_REMATCH[1]} fi - # TODO: delete gSrModeOLD after checking, that gSrModeOLD == gSrMode (new method) - super_ocf_log info "ACT: gSrModeOLD=$gSrModeOLD; gSrMode=$gSrMode" super_ocf_log info "ACT: hdbnsutil not answering - using global.ini as fallback - srmode=$gSrMode" ;; hU | * ) # call hdbnsUtil (hU) ( also for unknown chkMethod ) - # TODO PRIO 2: Debug-Log for result - # TODO PRIO 2: Check '--sacontrol=1 Output' for the begin and end mark to check valiity and completeness hdbANSWER=$(HANA_CALL --timeout "$HANA_CALL_TIMEOUT" --cmd "$hdbState --sapcontrol=1" 2>/dev/null) - gSrMode=$(echo "$hdbANSWER" | awk -F= '$1=="mode" {print $2}') + super_ocf_log info "ACT: HU: hdbANSWER="$hdbANSWER + #[[ "$hdbANSWER" =~ "SAPCONTROL-OK: "(.*)"SAPCONTROL-OK: " ]] && hdbANSWER="${BASH_REMATCH[1]}" || hdbANSWER="" + [[ "$hdbANSWER" =~ (^|$'\n')"mode="([^$'\n']+) ]] && gSrMode="${BASH_REMATCH[2]}" + super_ocf_log debug "ACT: HU: hdbANSWER=$hdbANSWER" ;; esac case "$gSrMode" in @@ -612,16 +604,7 @@ function get_local_sr_config() { esac done # TODO PRIO3: Implement a file lookup, if we did not get a result - gSite=$(echo "$hdbANSWER" | awk -F= '/site.name/ {print $2}') - # from scale-up: only set srmode, if it is still empty - # from scale-up: first search for 'actual_mode', then for 'mode' as key - if [ -z "$gSrMode" ]; then - gSrMode=$(echo "$hdbANSWER" | awk -F= '$1=="actual_mode" {print $2}') - # if 'actual_mode' is not available, fallback to 'mode' - if [ -z "$gSrMode" ]; then - gSrMode=$(echo "$hdbANSWER" | awk -F= '$1=="mode" {print $2}') - fi - fi + [[ "$hdbANSWER" =~ (^|$'\n'|/)site.name=([^$'\n']+) ]] && gSite="${BASH_REMATCH[2]}" || gSite="" } # end function get_local_sr_config # @@ -674,7 +657,7 @@ function check_for_primary() { local rc=0 case "$raType" in saphana* ) # SAPHanaController - get_local_sr_config # sets global variables gSrMode and gSite + get_local_sr_config >/dev/null # sets global variables gSrMode and gSite ;; sht* ) # SAPHanaTopology - did that already in sht_init() - maybe we can align that later ;; diff --git a/ra/saphana-controller-lib b/ra/saphana-controller-lib index 7d09ecc2..09d33809 100755 --- a/ra/saphana-controller-lib +++ b/ra/saphana-controller-lib @@ -413,6 +413,13 @@ function saphana_init() { NODENAME=$(crm_node -n) saphana_init_get_ocf_parameters # set SID, sid, sidadm, InstanceNr, InstanceName HANA_CALL_TIMEOUT, PreferSiteTakeover, AUTOMATED_REGISTER, RemoveSAPSockets # + # create directory for HANA_CALL command sdtout and stderr tracking + # + runDir="/run/SAPHanaSR_${SID}" + mkdir -p "$runDir" + chown "${SID,,}adm" "$runDir" + super_ocf_log info "DEC: preparing runDir ($runDir) for access of user ${SID,,}adm" + # # get sap virtual host name # get_local_virtual_name; SAPVIRHOST=${gVirtName} diff --git a/ra/saphana-topology-lib b/ra/saphana-topology-lib index 12fe0e4f..3cd78188 100755 --- a/ra/saphana-topology-lib +++ b/ra/saphana-topology-lib @@ -201,6 +201,13 @@ function sht_init() { HANA_CALL_TIMEOUT="${OCF_RESKEY_HANA_CALL_TIMEOUT:-120}" sid="${SID,,}" export sidadm="${sid}adm" # TODO PRIO3: NG - check if we use that var in a Topology/Controller common function in future + # + # create directory for HANA_CALL command sdtout and stderr tracking + # + runDir="/run/SAPHanaSR_${SID}" + mkdir -p "$runDir" + chown "${SID,,}adm" "$runDir" + super_ocf_log info "DEC: preparing runDir ($runDir) for access of user ${SID,,}adm" #ocf_env=$(env | grep 'OCF_RESKEY_CRM') #super_ocf_log debug "DBG: OCF: $ocf_env" # From 85e6f3d4b7f275b902f2c300ccf38a17a6ea32d3 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Thu, 24 Aug 2023 12:48:53 +0200 Subject: [PATCH 23/48] angi: infra: saphana-filesystem-lib - try to react on parameter ON_FAIL_ACTION (fail or ignore) --- ra/saphana-filesystem-lib | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/ra/saphana-filesystem-lib b/ra/saphana-filesystem-lib index 0c630ef8..150ae462 100755 --- a/ra/saphana-filesystem-lib +++ b/ra/saphana-filesystem-lib @@ -17,6 +17,7 @@ # OCF_RESKEY_SID (LNX, SUS, SLE) # OCF_RESKEY_InstanceNumber (00..99) # OCF_RESKEY_DIRECTORY (path to be monitored, default /hana/shared/) +# OCF_RESKEY_ON_FAIL_ACTION (optional: fail, ignore; default is fail) # ####################################################################### # @@ -162,7 +163,7 @@ function shfs_methods() { # function: shfs_init - initialize variables for the resource agent # params: - # globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), -# globals: ATTR_NAME_HANA_* +# globals: ATTR_NAME_HANA_* # globals: nodelist(w) # globals: NODENAME(w), hdbver(w) # shfs_init : Define global variables with default values, if optional parameters are not set @@ -181,13 +182,15 @@ function shfs_init() { SID="$OCF_RESKEY_SID" InstanceNr="$OCF_RESKEY_InstanceNumber" InstanceName="HDB${InstanceNr}" + runDir="/run/SAPHanaSR_${SID}" fs_hana_shared="${OCF_RESKEY_DIRECTORY:-/hana/shared/$SID}" HANA_CALL_TIMEOUT="${OCF_RESKEY_HANA_CALL_TIMEOUT:-90}" + ON_FAIL_ACTION="${OCF_RESKEY_ON_FAIL_ACTION:-fail}" td_hana_shared="$fs_hana_shared/.suse_SAPHanaFilesystem/${NODENAME}" pp_hana_shared="/dev/shm/poison_pill_${SID}" sid="${SID,,}" export sidadm="${sid}adm" - RemoveSAPSockets="" # not needed in SAPHanaFilesystem, but referened in shell lib saphana-controller-common-lib used by SAPHanaFilesystem + RemoveSAPSockets="" # not needed in SAPHanaFilesystem, but referened in shell lib saphana-controller-common-lib used by SAPHanaFilesystem # # init attribute definitions # @@ -222,7 +225,7 @@ function shfs_start() { } # end function shfs_start # -# function: shfs_stop - stop a hana pseudo FS resource +# function: shfs_stop - stop a hana pseudo FS resource # params: - # globals: OCF_*(r), SAPCONTROL(r), SID(r), InstanceName(r) # shfs_stop: Stop the SAP HANA pseudo FS resource instance @@ -257,7 +260,7 @@ function shfs_error_simulation() { return 0 else return 1 - fi + fi } function shfs_test_directory() @@ -268,7 +271,7 @@ function shfs_test_directory() dd_opts="oflag=direct,sync bs=4k count=1 conv=fsync,sync" STATUSFILE="${td_hana_shared}/test" # shellcheck disable=SC2086 # - dd_opts must not be quoted - dd if=/dev/zero of="${STATUSFILE}" $dd_opts + dd if=/dev/zero of="${STATUSFILE}" $dd_opts ); rc2="$?" ls "${td_hana_shared}" (( error_code = 1000 * rc2 + rc1 )) @@ -306,7 +309,7 @@ function shfs_monitor() { else # # only for PoC tests allow an error simulation - # + # if shfs_error_simulation; then test_rc=42 fi @@ -337,11 +340,19 @@ function shfs_monitor() { ;; *_SOK ) rc="$OCF_ERR_GENERIC" super_ocf_log info "RA monitor() ${test_rc}_${test_rem_sr} Go out of here" - touch "$pp_hana_shared" - ;; + case "$ON_FAIL_ACTION" in + ignore ) + super_ocf_log info "RA monitor() ON_FAIL_ACTION=$ON_FAIL_ACTION => ignore FS error, do not create poison pill file" + ;; + * ) + super_ocf_log info "RA monitor() ON_FAIL_ACTION=$ON_FAIL_ACTION => do not ignore FS error, create poison pill file" + touch "$pp_hana_shared" + ;; + esac + ;; *_* ) rc="$OCF_ERR_GENERIC" super_ocf_log info "RA monitor() ${test_rc}_${test_rem_sr} Thunderstorm" - ;; + ;; esac fi super_ocf_log info "FLOW ${FUNCNAME[0]} rc=$rc" @@ -401,7 +412,7 @@ function shfs_validate() { # globals: OCF_*(r), # shfs_start_clone # -function shfs_start_clone() { +function shfs_start_clone() { # called by: TODO super_ocf_log info "FLOW ${FUNCNAME[0]} ($*)" local rc="$OCF_NOT_RUNNING" From 62268a9dcd4c6ccd765ade5f6c970257cd79d209 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Thu, 24 Aug 2023 12:50:07 +0200 Subject: [PATCH 24/48] angi: package: saphana-common-lib - for shellcheck --- ra/saphana-common-lib | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ra/saphana-common-lib b/ra/saphana-common-lib index 0e0525c0..a17f53a2 100755 --- a/ra/saphana-common-lib +++ b/ra/saphana-common-lib @@ -570,8 +570,7 @@ function get_local_sr_config() { ;; hU | * ) # call hdbnsUtil (hU) ( also for unknown chkMethod ) hdbANSWER=$(HANA_CALL --timeout "$HANA_CALL_TIMEOUT" --cmd "$hdbState --sapcontrol=1" 2>/dev/null) - super_ocf_log info "ACT: HU: hdbANSWER="$hdbANSWER - #[[ "$hdbANSWER" =~ "SAPCONTROL-OK: "(.*)"SAPCONTROL-OK: " ]] && hdbANSWER="${BASH_REMATCH[1]}" || hdbANSWER="" + super_ocf_log info "ACT: HU: hdbANSWER=$hdbANSWER" [[ "$hdbANSWER" =~ (^|$'\n')"mode="([^$'\n']+) ]] && gSrMode="${BASH_REMATCH[2]}" super_ocf_log debug "ACT: HU: hdbANSWER=$hdbANSWER" ;; From 3c1a01c472272c2d8b6eae92c70e3173897f5d0b Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Thu, 24 Aug 2023 12:51:14 +0200 Subject: [PATCH 25/48] angi: package: saphana-controller-lib - reduced piping commands --- ra/saphana-controller-lib | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ra/saphana-controller-lib b/ra/saphana-controller-lib index 09d33809..e84941a1 100755 --- a/ra/saphana-controller-lib +++ b/ra/saphana-controller-lib @@ -1591,10 +1591,10 @@ function saphana_monitor_running_secondary() { fi lpa_set_lpt 30 "$gSite" sync_attr=$(get_SRHOOK "$gSite") - # TODO: PRIO 3: check, if using getParameter.py is the best option to analyze the operationMode + # DONE: PRIO 3: check, if using getParameter.py is the best option to analyze the operationMode hanaOut1=$(HANA_CALL --timeout 10 --use-su --cmd "python getParameter.py --key=global.ini/system_replication/operation_mode --sapcontrol=1") - hanaFilter1=$(echo "$hanaOut1" | awk -F/ 'BEGIN {out=0} /^SAPCONTROL-OK: / { out=1 } /^SAPCONTROL-OK: / { out=0 } /=/ {if (out==1) {print $3} }') - hanaOM=$(echo "$hanaFilter1" | awk -F= '$1=="operation_mode" {print $2}') + [[ "$hanaOut1" =~ "SAPCONTROL-OK: "(.*)"SAPCONTROL-OK: " ]] && hanaFilter1="${BASH_REMATCH[1]}" || hanaFilter1="" + [[ "$hanaFilter1" =~ "/operation_mode="([^$'\n']+) ]] && hanaOM="${BASH_REMATCH[1]}" || hanaOM="" set_hana_site_attribute "${gSite}" "$hanaOM" "${ATTR_NAME_HANA_SITE_OPERATION_MODE[@]}" if [[ -n "$remoteNode" && -n "$remSite" ]]; then set_hana_site_attribute "${remSite}" "$hanaOM" "${ATTR_NAME_HANA_SITE_OPERATION_MODE[@]}" # also set attribute for remote site From 8a4ea61ad4d37c0e3a9dd242ff5bcb33ec511fda Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Thu, 24 Aug 2023 12:52:18 +0200 Subject: [PATCH 26/48] angi: package: *spec - changing to version 1.2.1 --- SAPHanaSR-angi.spec | 2 +- SAPHanaSR-tester.spec | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/SAPHanaSR-angi.spec b/SAPHanaSR-angi.spec index ef54c37f..09e92d51 100644 --- a/SAPHanaSR-angi.spec +++ b/SAPHanaSR-angi.spec @@ -21,7 +21,7 @@ License: GPL-2.0 Group: Productivity/Clustering/HA AutoReqProv: on Summary: Resource agents to control the HANA database in system replication setup -Version: 1.2.0 +Version: 1.2.1 Release: 0 Url: https://www.suse.com/c/fail-safe-operation-of-sap-hana-suse-extends-its-high-availability-solution/ diff --git a/SAPHanaSR-tester.spec b/SAPHanaSR-tester.spec index abb3d3c1..5d6036cc 100644 --- a/SAPHanaSR-tester.spec +++ b/SAPHanaSR-tester.spec @@ -19,7 +19,7 @@ License: GPL-2.0 Group: Productivity/Clustering/HA AutoReqProv: on Summary: Test suite for SAPHanaSR clusters -Version: 1.2.0 +Version: 1.2.1 Release: 0 Url: https://www.suse.com/c/fail-safe-operation-of-sap-hana-suse-extends-its-high-availability-solution/ From 611283f9d5594100d32ff1479a5a0d437ce0a8dc Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Thu, 24 Aug 2023 12:53:26 +0200 Subject: [PATCH 27/48] angi: future: srHook/susHanaSrMultiTarget.py.to-be-integrated - added for later integration --- .../susHanaSrMultiTarget.py.to-be-integrated | 262 ++++++++++++++++++ 1 file changed, 262 insertions(+) create mode 100644 srHook/susHanaSrMultiTarget.py.to-be-integrated diff --git a/srHook/susHanaSrMultiTarget.py.to-be-integrated b/srHook/susHanaSrMultiTarget.py.to-be-integrated new file mode 100644 index 00000000..90590453 --- /dev/null +++ b/srHook/susHanaSrMultiTarget.py.to-be-integrated @@ -0,0 +1,262 @@ +""" +# SAPHana +# Author: Fabian Herschel, 2015 +# License: GNU General Public License (GPL) +# Copyright: (c) 2015-2016 SUSE Linux GmbH +# Copyright: (c) 2017-2021 SUSE LLC + +SAPHanaSrMultiTarget needs SAP HANA 2.0 SPS4 (2.00.040.00) as minimum version +""" +import os, random +from datetime import datetime + +try: + from hdb_ha_dr.client import HADRBase +except ImportError as e: + print("Module HADRBase not found - running outside of SAP HANA? - {0}".format(e)) + +""" +Only for SAP HANA >= 2.0 SPS3 + +To use this HA/DR hook provide please add the following lines (or similar) to your global.ini: + [ha_dr_provider_SAPHanaSrMultiTarget] + provider = SAPHanaSrMultiTarget + path = /usr/share/SAPHanaSR-ScaleOut + cib_access = all-on + execution_order = 1 + + [trace] + ha_dr_saphanasr = info +""" +fhSRHookVersion = "0.181.0.0216.1706" +srHookGen = "2.2" +cib_access_dflt = "all-on" +startTime = 0 +currentTime = 0 +stopTime = 0 + +def getEpisode(): + episode = "{0}-{1}".format( datetime.now().strftime('%s') , random.randrange(10000,20000)) + return episode + +def logTimestamp(episode, outputMessage): + traceFilepath = os.path.join(os.environ['SAP_RETRIEVAL_PATH'], 'trace', 'nameserver_saphanasr_multitarget_hook.trc') + try: + with open(traceFilepath, "a") as saphanasr_multitarget_file: + currentTimeStr = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f ') + outputMessage = "{0} [{2}] {1}".format(currentTimeStr,outputMessage, episode) + saphanasr_multitarget_file.write(outputMessage + "\n") + saphanasr_multitarget_file.flush() + + except ( RuntimeError, TypeError, NameError, OSError ) as e : + self.tracer.info("{0}.{1}() logTimestamp error {2}".format(self.__class__.__name__, method, e)) + print("Error in logTimestamp(): {0}".format(e)) + +try: + class SAPHanaSrMultiTarget(HADRBase): + + + def __init__(self, *args, **kwargs): + episode = getEpisode() + logTimestamp(episode, "init called") + # delegate construction to base class + super(SAPHanaSrMultiTarget, self).__init__(*args, **kwargs) + method = "init" + if self.config.hasKey("cib_access"): + self.cib_access = self.config.get("cib_access") + # first step, should be removed later + if self.cib_access != "site-on": + self.cib_access = "all-on" + else: + self.cib_access = cib_access_dflt + self.tracer.info("{0}.{1}() version {2}, hookGeneration {3}, cib_access {4}".format(self.__class__.__name__, method, fhSRHookVersion, srHookGen, self.cib_access)) + mySID = os.environ.get('SAPSYSTEMNAME') + mysid = mySID.lower() + myCMD = "sudo /usr/sbin/crm_attribute -n hana_{1}_gsh -v {0} -l reboot".format(srHookGen, mysid) + logTimestamp(episode, "pre call " + myCMD) + rc = os.system(myCMD) + logTimestamp(episode, "post call " + myCMD) + myMSG = "CALLING CRM: <{0}> rc={1}".format(myCMD, rc) + self.tracer.info("{0}.{1}() {2}\n".format(self.__class__.__name__, method, myMSG)) + self.tracer.info("{0}.{1}() Running srHookGeneration {2}, see attribute hana_{3}_gsh too\n".format(self.__class__.__name__, method, srHookGen, mysid)) + + # check if multi-target support attribute exists + mts = "true" + myCMD = "sudo /usr/sbin/crm_attribute -n hana_%s_glob_mts -G" % (mysid) + logTimestamp(episode, "pre call " + myCMD) + rc = os.system(myCMD) + logTimestamp(episode, "post call " + myCMD) + if rc != 0: + # multi-target support attribute not found, create it + myCMD = "sudo /usr/sbin/crm_attribute -n hana_{0}_glob_mts -v {1} -t crm_config -s SAPHanaSR".format(mysid, mts) + logTimestamp(episode, "pre call " + myCMD) + rc = os.system(myCMD) + logTimestamp(episode, "post call " + myCMD) + myMSG = "CALLING CRM: <{0}> rc={1}".format(myCMD, rc) + self.tracer.info("{0}.{1}() {2}\n".format(self.__class__.__name__, method, myMSG)) + logTimestamp(episode, "init exit") + + + def about(self): + method = "about" + self.tracer.info("{0}.{1}() version {2}".format(self.__class__.__name__, method, fhSRHookVersion)) + return {"provider_company": "SUSE", + "provider_name": "SAPHanaSrMultiTarget", # class name + "provider_description": "Inform Cluster about SR state", + "provider_version": "1.0"} + + def startup(self, hostname, storage_partition, sr_mode, **kwargs): + method = "startup" + self.tracer.debug("enter startup hook; {0}".format(locals())) + self.tracer.debug(self.config.toString()) + self.tracer.info("leave startup hook") + return 0 + + def shutdown(self, hostname, storage_partition, sr_mode, **kwargs): + method = "shutdown" + self.tracer.debug("enter shutdown hook; {0}".format(locals())) + self.tracer.debug(self.config.toString()) + self.tracer.info("leave shutdown hook") + return 0 + + def failover(self, hostname, storage_partition, sr_mode, **kwargs): + method = "failover" + self.tracer.debug("enter failover hook; {0}".format(locals())) + self.tracer.debug(self.config.toString()) + self.tracer.info("leave failover hook") + return 0 + + def stonith(self, failingHost, **kwargs): + method = "stonith" + self.tracer.debug("enter stonith hook; {0}".format(locals())) + self.tracer.debug(self.config.toString()) + # e.g. stonith of params["failed_host"] + # e-g- set vIP active + self.tracer.info("leave stonith hook") + return 0 + + def preTakeover(self, isForce, **kwargs): + """Pre takeover hook.""" + method = "preTakeover" + self.tracer.info("{0}.{1}() method called with isForce={2}".format(self.__class__.__name__, method, isForce)) + if not isForce: + # run pre takeover code + # run pre-check, return != 0 in case of error => will abort takeover + return 0 + else: + # possible force-takeover only code + # usually nothing to do here + return 0 + + def postTakeover(self, rc, **kwargs): + method = "postTakeover" + """Post takeover hook.""" + self.tracer.info("{0}.{1}() method called with rc={2}".format(self.__class__.__name__, method, rc)) + if rc == 0: + # normal takeover succeeded + return 0 + elif rc == 1: + # waiting for force takeover + return 0 + elif rc == 2: + # error, something went wrong + return 0 + + def srConnectionChanged(self, ParamDict, **kwargs): + method = "srConnectionChanged" + startTime = datetime.now() + episode = getEpisode() + logTimestamp(episode, "srConnectionChanged called") + """ finally we got the srConnection hook :) """ + self.tracer.info("{0}.{1}() method called with Dict={2} (version {3}) and cib_access {4}".format(self.__class__.__name__, method, ParamDict, fhSRHookVersion, self.cib_access)) + logTimestamp(episode, "send dict message to log") + # myHostname = socket.gethostname() + # myDatebase = ParamDict["database"] + mySystemStatus = ParamDict["system_status"] + mySID = os.environ.get('SAPSYSTEMNAME') + mysid = mySID.lower() + myInSync = ParamDict["is_in_sync"] + myReason = ParamDict["reason"] + mySite = ParamDict["siteName"] + # if self.cib_access != "all-off" and self.cib_access != "glob-off": + if self.cib_access == "all-on" or self.cib_access == "glob-on" or self.cib_access == "site-off": + myCMD = "sudo /usr/sbin/crm_attribute -n hana_{1}_gsh -v {0} -l reboot".format(srHookGen, mysid) + logTimestamp(episode, "pre call " + myCMD) + rc = os.system(myCMD) + logTimestamp(episode, "post call " + myCMD) + myMSG = "CALLING CRM: <{0}> rc={1}".format(myCMD, rc) + self.tracer.info("{0}.{1}() {2}\n".format(self.__class__.__name__, method, myMSG)) + logTimestamp(episode, "send result to log") + self.tracer.info("{0}.{1}() Running srHookGeneration {2}, see attribute hana_{3}_gsh too\n".format(self.__class__.__name__, method, srHookGen, mysid)) + logTimestamp(episode, "differ cases following dictionary entries") + if mySystemStatus == 15: + mySRS = "SOK" + else: + if myInSync: + # ignoring the SFAIL, because we are still in sync + self.tracer.info("{0}.{1}() ignoring bad SR status because of is_in_sync=True (reason={2})".format(self.__class__.__name__, method, myReason)) + mySRS = "" + else: + mySRS = "SFAIL" + if mySRS == "": + myMSG = "### Ignoring bad SR status because of is_in_sync=True ###" + self.tracer.info("{0}.{1}() {2}\n".format(self.__class__.__name__, method, myMSG)) + elif mySite == "": + myMSG = "### Ignoring bad SR status because of empty site name in call params ###" + self.tracer.info("{0}.{1}() was called with empty site name. Ignoring call.".format(self.__class__.__name__, method)) + else: + # if self.cib_access != "all-off" and self.cib_access != "glob-off": + if self.cib_access == "all-on" or self.cib_access == "glob-on" or self.cib_access == "site-off": + # check if global Hook attribute exists + myCMD = "sudo /usr/sbin/crm_attribute -n hana_%s_glob_srHook -G" % (mysid) + rc = os.system(myCMD) + if rc == 0: + # found global Hook attribute, write both (old and new) attributes + # for compatibility reasons + myCMD = "sudo /usr/sbin/crm_attribute -n hana_{0}_glob_srHook -v {1} -t crm_config -s SAPHanaSR".format(mysid, mySRS) + logTimestamp(episode, "pre call " + myCMD) + rc = os.system(myCMD) + logTimestamp(episode, "post call " + myCMD) + myMSG = "CALLING CRM: <{0}> rc={1}".format(myCMD, rc) + self.tracer.info("{0}.{1}() {2}\n".format(self.__class__.__name__, method, myMSG)) + logTimestamp(episode, "send result to log") + + # if self.cib_access != "all-off" and self.cib_access != "site-off": + # if self.cib_access == "all-on" or self.cib_access == "site-on" or self.cib_access == "glob-off": + if self.cib_access == "all-on" or self.cib_access == "site-on": + myCMD = "sudo /usr/sbin/crm_attribute -n hana_{0}_site_srHook_{1} -v {2} -t crm_config -s SAPHanaSR".format(mysid, mySite, mySRS) + logTimestamp(episode, "pre call " + myCMD) + rc = os.system(myCMD) + logTimestamp(episode, "post call " + myCMD) + myMSG = "CALLING CRM: <{0}> rc={1}".format(myCMD, rc) + self.tracer.info("{0}.{1}() {2}\n".format(self.__class__.__name__, method, myMSG)) + logTimestamp(episode, "send result to log") + # + if rc != 0: + logTimestamp(episode, "update cluster attribute failed, enter fallback") + # + # FALLBACK + # sending attribute to the cluster failed - using fallback method and write status to a file - RA to pick-up the value during next SAPHanaController monitor operation + # + myMSG = "sending attribute to the cluster failed - using local file as fallback" + self.tracer.info("{0}.{1}() {2}\n".format(self.__class__.__name__, method, myMSG)) + # + # cwd of hana is /hana/shared//HDB00/ we use a relative path to cwd this gives us a adm permitted directory + # however we go one level up (..) to have the file accessible for all SAP HANA swarm nodes + # + logTimestamp(episode, "prepare fallback attribute file (stage)") + fallbackFileObject = open("../.crm_attribute.stage.{0}".format(mySite), "w") + fallbackFileObject.write("hana_{0}_site_srHook_{1} = {2}".format(mysid, mySite, mySRS)) + fallbackFileObject.close() + logTimestamp(episode, "created fallback attribute file (stage)") + # + # release the stage file to the original name (move is used to be atomic) + # .crm_attribute.stage. is renamed to .crm_attribute. + # + logTimestamp(episode, "move fallback attribute file stage to live") + os.rename("../.crm_attribute.stage.{0}".format(mySite), "../.crm_attribute.{0}".format(mySite)) + logTimestamp(episode, "ready to move fallback attribute file stage to live") + logTimestamp(episode, "srConnectionChanged exit") + return 0 +except NameError as e: + print("Could not find base class ({0})".format(e)) From f2becc2fa599dc33ee8866dcf06d89dcd192aa28 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Fri, 25 Aug 2023 11:39:44 +0200 Subject: [PATCH 28/48] angi: performance: saphana-controller-lib - next step to reduce forking and to use bash interal functions --- ra/saphana-controller-lib | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ra/saphana-controller-lib b/ra/saphana-controller-lib index e84941a1..33e127ab 100755 --- a/ra/saphana-controller-lib +++ b/ra/saphana-controller-lib @@ -1839,7 +1839,8 @@ function saphana_monitor_clone() { # check during probe, if this instance is *NOT* running as master # setting clone_state to "DEMOTED" is needed to avoid misleading "PROMOTED"/"PROMOTED" # - master_node_name=$(crm_resource -W -r "$OCF_RESOURCE_INSTANCE" | awk '$7 == "Master" { print $6 }') + crm_res=$(crm_resource -W -r "$OCF_RESOURCE_INSTANCE") + [[ "$crm_res" =~ "is running on: "(.+)" Master" ]] && master_node_name="${BASH_REMATCH[1]}" || master_node_name="" if [ "$master_node_name" != "$NODENAME" ]; then if [ "$gLss" -ge 2 ]; then # we are in a probe, hana is running and we need to reset the clone_state From 4a259c5fe2cab42101501e39020167bde0385e3f Mon Sep 17 00:00:00 2001 From: lpinne Date: Fri, 25 Aug 2023 13:13:12 +0200 Subject: [PATCH 29/48] angi-ScaleUp/demo_kill_prim_inst.json angi-ScaleUp/kill_prim_indexserver.json angi-ScaleUp/kill_prim_inst.json angi-ScaleUp/kill_secn_indexserver.json angi-ScaleUp/kill_secn_inst.json classic-ScaleUp/kill_prim_indexserver.json classic-ScaleUp/kill_prim_inst.jsonclassic-ScaleUp/kill_prim_inst.json classic-ScaleUp/kill_secn_indexserver.json classic-ScaleUp/kill_secn_inst.json: changed lss=[12] to lss=(1|2) --- test/json/angi-ScaleUp/demo_kill_prim_inst.json | 2 +- test/json/angi-ScaleUp/kill_prim_indexserver.json | 2 +- test/json/angi-ScaleUp/kill_prim_inst.json | 2 +- test/json/angi-ScaleUp/kill_secn_indexserver.json | 2 +- test/json/angi-ScaleUp/kill_secn_inst.json | 2 +- test/json/classic-ScaleUp/kill_prim_indexserver.json | 2 +- test/json/classic-ScaleUp/kill_prim_inst.json | 2 +- test/json/classic-ScaleUp/kill_secn_indexserver.json | 2 +- test/json/classic-ScaleUp/kill_secn_inst.json | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/test/json/angi-ScaleUp/demo_kill_prim_inst.json b/test/json/angi-ScaleUp/demo_kill_prim_inst.json index e92474d8..548d9476 100644 --- a/test/json/angi-ScaleUp/demo_kill_prim_inst.json +++ b/test/json/angi-ScaleUp/demo_kill_prim_inst.json @@ -24,7 +24,7 @@ "loop": 120, "wait": 2, "pSite": [ - "lss=[12]" , + "lss=(1|2)" , "srr=P" , "lpt=(1[6-9]........|20)" , "srHook=(PRIM|SWAIT|SREG)" , diff --git a/test/json/angi-ScaleUp/kill_prim_indexserver.json b/test/json/angi-ScaleUp/kill_prim_indexserver.json index 4f71453c..ffdb46d9 100644 --- a/test/json/angi-ScaleUp/kill_prim_indexserver.json +++ b/test/json/angi-ScaleUp/kill_prim_indexserver.json @@ -22,7 +22,7 @@ "loop": 120, "wait": 2, "pSite": [ - "lss=[12]" , + "lss=(1|2)" , "srr=P" , "lpt=(1[6-9]........|20)" , "srHook=(PRIM|SWAIT|SREG)" , diff --git a/test/json/angi-ScaleUp/kill_prim_inst.json b/test/json/angi-ScaleUp/kill_prim_inst.json index a4300933..fe2288eb 100644 --- a/test/json/angi-ScaleUp/kill_prim_inst.json +++ b/test/json/angi-ScaleUp/kill_prim_inst.json @@ -24,7 +24,7 @@ "loop": 120, "wait": 2, "pSite": [ - "lss=[12]" , + "lss=(1|2)" , "srr=P" , "lpt=(1[6-9]........|20)" , "srHook=(PRIM|SWAIT|SREG)" , diff --git a/test/json/angi-ScaleUp/kill_secn_indexserver.json b/test/json/angi-ScaleUp/kill_secn_indexserver.json index e6634915..213e5736 100644 --- a/test/json/angi-ScaleUp/kill_secn_indexserver.json +++ b/test/json/angi-ScaleUp/kill_secn_indexserver.json @@ -30,7 +30,7 @@ ], "sSite": [ "lpt=(10|30)", - "lss=[12]", + "lss=(1|2)", "srr=S", "srHook=SFAIL", "srPoll=(SFAIL|SOK)" diff --git a/test/json/angi-ScaleUp/kill_secn_inst.json b/test/json/angi-ScaleUp/kill_secn_inst.json index 0f929967..95f2de32 100644 --- a/test/json/angi-ScaleUp/kill_secn_inst.json +++ b/test/json/angi-ScaleUp/kill_secn_inst.json @@ -30,7 +30,7 @@ ], "sSite": [ "lpt=(10|30)", - "lss=[12]", + "lss=(1|2)", "srr=S", "srHook=SFAIL", "srPoll=(SFAIL|SOK)" diff --git a/test/json/classic-ScaleUp/kill_prim_indexserver.json b/test/json/classic-ScaleUp/kill_prim_indexserver.json index 67ed095e..3be3fedd 100644 --- a/test/json/classic-ScaleUp/kill_prim_indexserver.json +++ b/test/json/classic-ScaleUp/kill_prim_indexserver.json @@ -22,7 +22,7 @@ "loop": 120, "wait": 2, "pSite": [ - "lss=[12]" , + "lss=(1|2)" , "srr=P" , "lpt=(1[6-9]........|20)" , "srHook=(PRIM|SWAIT|SREG)" , diff --git a/test/json/classic-ScaleUp/kill_prim_inst.json b/test/json/classic-ScaleUp/kill_prim_inst.json index 05958b6b..1674e73e 100644 --- a/test/json/classic-ScaleUp/kill_prim_inst.json +++ b/test/json/classic-ScaleUp/kill_prim_inst.json @@ -24,7 +24,7 @@ "loop": 120, "wait": 2, "pSite": [ - "lss=[12]" , + "lss=(1|2)" , "srr=P" , "lpt=(1[6-9]........|20)" , "srHook=(PRIM|SWAIT|SREG)" , diff --git a/test/json/classic-ScaleUp/kill_secn_indexserver.json b/test/json/classic-ScaleUp/kill_secn_indexserver.json index d82615fe..409b37d3 100644 --- a/test/json/classic-ScaleUp/kill_secn_indexserver.json +++ b/test/json/classic-ScaleUp/kill_secn_indexserver.json @@ -30,7 +30,7 @@ ], "sSite": [ "lpt=(10|30)", - "lss=[12]", + "lss=(1|2)", "srr=S", "srHook=SFAIL", "srPoll=(SFAIL|SOK)" diff --git a/test/json/classic-ScaleUp/kill_secn_inst.json b/test/json/classic-ScaleUp/kill_secn_inst.json index ef4af67a..ee928ef7 100644 --- a/test/json/classic-ScaleUp/kill_secn_inst.json +++ b/test/json/classic-ScaleUp/kill_secn_inst.json @@ -30,7 +30,7 @@ ], "sSite": [ "lpt=(10|30)", - "lss=[12]", + "lss=(1|2)", "srr=S", "srHook=SFAIL", "srPoll=(SFAIL|SOK)" From c567c902914bbe48a7321031c897ad4ee396ac06 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Mon, 28 Aug 2023 10:06:15 +0200 Subject: [PATCH 30/48] angi: support: saphana-common-lib - for better supportability add RA process ID to the HANA_CALL; the PID could be found in the SAP HANA trace files --- ra/saphana-common-lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ra/saphana-common-lib b/ra/saphana-common-lib index a17f53a2..c00559c3 100755 --- a/ra/saphana-common-lib +++ b/ra/saphana-common-lib @@ -395,7 +395,7 @@ function HANA_CALL() { if [ "$use_su" == "1" ]; then pre_cmd_type="su" - [[ "$cmd" == python* ]] && pre_script="cd $DIR_EXECUTABLE/python_support" || pre_script='true' + [[ "$cmd" == python* ]] && pre_script=": [$$]; cd $DIR_EXECUTABLE/python_support" || pre_script=": [$$]" else # as root user we need the library path to the SAP kernel to be able to call sapcontrol # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH From b58ff1d424ce976e8b8d819e0ba2985c25b01648 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Thu, 31 Aug 2023 16:02:27 +0200 Subject: [PATCH 31/48] classic: tester: begin with classic-ScaleOut (with and without SAP HANA HA) --- .../angi-ScaleOut/bmt.json | 42 ++++++ .../defaultChecks+newComparators.json | 66 +++++++++ .../angi-ScaleOut/defaultChecks.json | 57 ++++++++ .../angi-ScaleOut/free_log_area.json | 42 ++++++ .../angi-ScaleOut/kill_prim_indexserver.json | 97 +++++++++++++ .../angi-ScaleOut/kill_prim_inst.json | 99 +++++++++++++ .../angi-ScaleOut/kill_prim_node.json | 91 ++++++++++++ .../kill_prim_worker_indexserver.json | 95 ++++++++++++ .../angi-ScaleOut/kill_prim_worker_inst.json | 97 +++++++++++++ .../angi-ScaleOut/kill_prim_worker_node.json | 93 ++++++++++++ .../angi-ScaleOut/kill_secn_indexserver.json | 95 ++++++++++++ .../angi-ScaleOut/kill_secn_inst.json | 94 ++++++++++++ .../angi-ScaleOut/kill_secn_node.json | 89 ++++++++++++ .../angi-ScaleOut/kill_secn_worker_inst.json | 74 ++++++++++ .../angi-ScaleOut/kill_secn_worker_node.json | 72 ++++++++++ .../maintenance_cluster_turn_hana.json | 32 +++++ .../angi-ScaleOut/nop-false.json | 33 +++++ .../angi-ScaleOut/nop.json | 31 ++++ .../angi-ScaleOut/properties.json | 6 + .../angi-ScaleOut/properties_ha1_hdb10.json | 5 + .../angi-ScaleOut/restart_cluster.json | 31 ++++ .../restart_cluster_hana_running.json | 31 ++++ .../restart_cluster_turn_hana.json | 32 +++++ .../angi-ScaleOut/sap.json | 136 ++++++++++++++++++ .../angi-ScaleOut/spn.json | 97 +++++++++++++ .../angi-ScaleOut/ssn.json | 98 +++++++++++++ .../classic-ScaleOut/angi-ScaleOut/bmt.json | 42 ++++++ .../defaultChecks+newComparators.json | 66 +++++++++ .../angi-ScaleOut/defaultChecks.json | 57 ++++++++ .../angi-ScaleOut/free_log_area.json | 42 ++++++ .../angi-ScaleOut/kill_prim_indexserver.json | 97 +++++++++++++ .../angi-ScaleOut/kill_prim_inst.json | 99 +++++++++++++ .../angi-ScaleOut/kill_prim_node.json | 91 ++++++++++++ .../kill_prim_worker_indexserver.json | 95 ++++++++++++ .../angi-ScaleOut/kill_prim_worker_inst.json | 97 +++++++++++++ .../angi-ScaleOut/kill_prim_worker_node.json | 93 ++++++++++++ .../angi-ScaleOut/kill_secn_indexserver.json | 95 ++++++++++++ .../angi-ScaleOut/kill_secn_inst.json | 94 ++++++++++++ .../angi-ScaleOut/kill_secn_node.json | 89 ++++++++++++ .../angi-ScaleOut/kill_secn_worker_inst.json | 74 ++++++++++ .../angi-ScaleOut/kill_secn_worker_node.json | 72 ++++++++++ .../maintenance_cluster_turn_hana.json | 32 +++++ .../angi-ScaleOut/nop-false.json | 33 +++++ .../classic-ScaleOut/angi-ScaleOut/nop.json | 31 ++++ .../angi-ScaleOut/properties.json | 6 + .../angi-ScaleOut/properties_ha1_hdb10.json | 5 + .../angi-ScaleOut/restart_cluster.json | 31 ++++ .../restart_cluster_hana_running.json | 31 ++++ .../restart_cluster_turn_hana.json | 32 +++++ .../classic-ScaleOut/angi-ScaleOut/sap.json | 136 ++++++++++++++++++ .../classic-ScaleOut/angi-ScaleOut/spn.json | 97 +++++++++++++ .../classic-ScaleOut/angi-ScaleOut/ssn.json | 98 +++++++++++++ 52 files changed, 3470 insertions(+) create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/bmt.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/defaultChecks+newComparators.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/defaultChecks.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/free_log_area.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_indexserver.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_inst.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_node.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_worker_indexserver.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_worker_inst.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_worker_node.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_indexserver.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_inst.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_node.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_worker_inst.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_worker_node.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/maintenance_cluster_turn_hana.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/nop-false.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/nop.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/properties.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/properties_ha1_hdb10.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/restart_cluster.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/restart_cluster_hana_running.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/restart_cluster_turn_hana.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/sap.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/spn.json create mode 100644 test/json/classic-ScaleOut-BW/angi-ScaleOut/ssn.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/bmt.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/defaultChecks+newComparators.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/defaultChecks.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/free_log_area.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_indexserver.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_inst.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_node.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_worker_indexserver.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_worker_inst.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_worker_node.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_indexserver.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_inst.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_node.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_worker_inst.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_worker_node.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/maintenance_cluster_turn_hana.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/nop-false.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/nop.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/properties.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/properties_ha1_hdb10.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/restart_cluster.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/restart_cluster_hana_running.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/restart_cluster_turn_hana.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/sap.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/spn.json create mode 100644 test/json/classic-ScaleOut/angi-ScaleOut/ssn.json diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/bmt.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/bmt.json new file mode 100644 index 00000000..ec682537 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/bmt.json @@ -0,0 +1,42 @@ +{ + "test": "bmt", + "name": "blocked manual takeover", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "bmt", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "test prerequitsites", + "next": "step40", + "loop": 1, + "wait": 1, + "post": "sleep 120", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step40", + "name": "still running", + "next": "END", + "loop": 1, + "wait": 1, + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/defaultChecks+newComparators.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/defaultChecks+newComparators.json new file mode 100644 index 00000000..44699c70 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/defaultChecks+newComparators.json @@ -0,0 +1,66 @@ +{ + "checkPtr": { + "comparartorinline": [ + "alfa!=dassollungleichsein", + "lpa_@@sid@@_lpt > 160000", + "beta=dassollgleichsein" + ], + "comparatortuple": [ + ("noty", "alfa=ungleich"), + () + ], + "globalUp": [ + "topology=ScaleOut" + ], + "pHostUp": [ + "clone_state=PROMOTED", + "roles=master1:master:worker:master", + "score=150" + ], + "pSiteUp": [ + "lpt=1[6-9]........", + "lss=4", + "srr=P", + "srHook=PRIM", + "srPoll=PRIM" + ], + "sSiteUp": [ + "lpt=30", + "lss=4", + "srr=S", + "srHook=SOK", + "srPoll=SOK" + ], + "sHostUp": [ + "clone_state=DEMOTED", + "roles=master1:master:worker:master", + "score=100" + ], + "pHostDown": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=150" , + "standby=on" + ], + "pSiteDown": [ + "lpt=1[6-9]........" , + "lss=1" , + "srr=P" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSiteDown": [ + "lpt=10", + "lss=1", + "srr=S", + "srHook=SFAIL", + "srPoll=SFAIL" + ], + "sHostDown": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=100" , + "standby=on" + ] + } +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/defaultChecks.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/defaultChecks.json new file mode 100644 index 00000000..7e18eb7c --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/defaultChecks.json @@ -0,0 +1,57 @@ +{ + "checkPtr": { + "globalUp": [ + "topology=ScaleOut" + ], + "pHostUp": [ + "clone_state=PROMOTED", + "roles=master1:master:worker:master", + "score=150" + ], + "pSiteUp": [ + "lpt=1[6-9]........", + "lss=4", + "srr=P", + "srHook=PRIM", + "srPoll=PRIM" + ], + "sSiteUp": [ + "lpt=30", + "lss=4", + "srr=S", + "srHook=SOK", + "srPoll=SOK" + ], + "sHostUp": [ + "clone_state=DEMOTED", + "roles=master1:master:worker:master", + "score=100" + ], + "pHostDown": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=150" , + "standby=on" + ], + "pSiteDown": [ + "lpt=1[6-9]........" , + "lss=1" , + "srr=P" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSiteDown": [ + "lpt=10", + "lss=1", + "srr=S", + "srHook=SFAIL", + "srPoll=SFAIL" + ], + "sHostDown": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=100" , + "standby=on" + ] + } +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/free_log_area.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/free_log_area.json new file mode 100644 index 00000000..8a664118 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/free_log_area.json @@ -0,0 +1,42 @@ +{ + "test": "free_log_area", + "name": "free log area on primary", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "shell test_free_log_area", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "still running", + "next": "step40", + "loop": 1, + "wait": 1, + "post": "sleep 60", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step40", + "name": "still running", + "next": "END", + "loop": 1, + "wait": 1, + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_indexserver.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_indexserver.json new file mode 100644 index 00000000..31337a39 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_indexserver.json @@ -0,0 +1,97 @@ +{ + "test": "kill_prim_indexserver", + "name": "Kill primary indexserver", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_prim_indexserver", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "comment": "sSite: srPoll could get SFAIL on scale-out", + "pSite": [ + "lss=(1|2)" , + "srr=P" , + "lpt=(1[6-9]........|20)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=S", + "srHook=(PRIM|SOK)", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , + "roles=master1::worker:" , + "score=(90|70|5|0)" + ], + "sHost": [ + "clone_state=(PROMOTED|DEMOTED)" , + "roles=master1:master:worker:master" , + "score=(100|145)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=(1[6-9]........|30|20|10)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=(S|P)", + "srHook=PRIM", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(UNDEFINED|DEMOTED)" , + "roles=master1::worker:" , + "score=(90|70|5)" + ], + "sHost": [ + "clone_state=(DEMOTED|PROMOTED)" , + "roles=master1:master:worker:master" , + "score=(100|145)" , + "srah=T" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 360, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_inst.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_inst.json new file mode 100644 index 00000000..838f026e --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_inst.json @@ -0,0 +1,99 @@ +{ + "test": "kill_prim_inst", + "name": "Kill primary instance", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_prim_inst", + "todo": "allow something like pSite=@@pSite@@ or pSite=%pSite", + "todo1": "allow something like lss>2, lpt>10000, score!=123", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "comment": "sSite: srPoll could get SFAIL on scale-out", + "pSite": [ + "lss=(1|2)" , + "srr=P" , + "lpt=(1[6-9]........|20)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=S", + "srHook=(PRIM|SOK)", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , + "roles=master1::worker:" , + "score=(90|70|5|0)" + ], + "sHost": [ + "clone_state=(PROMOTED|DEMOTED)" , + "roles=master1:master:worker:master" , + "score=(100|145)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=(1[6-9]........|30|20|10)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=(S|P)", + "srHook=PRIM", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(UNDEFINED|DEMOTED)" , + "roles=master1::worker:" , + "score=(90|70|5)" + ], + "sHost": [ + "clone_state=(DEMOTED|PROMOTED)" , + "roles=master1:master:worker:master" , + "score=(100|145)" , + "srah=T" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 360, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_node.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_node.json new file mode 100644 index 00000000..d699c9e0 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_node.json @@ -0,0 +1,91 @@ +{ + "test": "kill_prim_node", + "name": "Kill primary master node", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_prim_node", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=(1[6-9]........|20|10)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=(S|P)", + "srHook=(PRIM|SOK)", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + ], + "sHost": [ + "clone_state=(PROMOTED|DEMOTED)", + "roles=master1:master:worker:master" , + "score=(100|145)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 300, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=(1|2)", + "srr=(P|S)" , + "lpt=(1[6-9]........|30|20|10)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=(PRIM|SFAIL)" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=(S|P)", + "srHook=PRIM", + "srPoll=(SOK|PRIM)" + ], + "pHost": [ + "clone_state=(UNDEFINED|DEMOTED|WAITING4NODES)" , + "roles=master1::worker:" + ], + "sHost": [ + "clone_state=(DEMOTED|PROMOTED)" , + "roles=master1:master:worker:master" , + "score=(100|145|150)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 300, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_worker_indexserver.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_worker_indexserver.json new file mode 100644 index 00000000..ee4d17f0 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_worker_indexserver.json @@ -0,0 +1,95 @@ +{ + "test": "kill_prim_worker_indexserver", + "name": "Kill primary worker indexserver", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_prim_worker_indexserver", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss=(1|2)" , + "srr=P" , + "lpt=(1[6-9]........|20)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=S", + "srHook=(PRIM|SOK)", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , + "score=(90|70|5|0)" + ], + "sHost": [ + "clone_state=(PROMOTED|DEMOTED)", + "roles=master1:master:worker:master" , + "score=(100|145)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "todo2": "why do we need SFAIL for srHook?", + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=(1[6-9]........|30|20|10)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=(S|P)", + "srHook=(PRIM|SFAIL)", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(UNDEFINED|DEMOTED)" , + "score=(90|70|5)" + ], + "sHost": [ + "clone_state=(DEMOTED|PROMOTED)" , + "roles=master1:master:worker:master" , + "score=(100|145)" , + "srah=T" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 360, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_worker_inst.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_worker_inst.json new file mode 100644 index 00000000..59e6d205 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_worker_inst.json @@ -0,0 +1,97 @@ +{ + "test": "kill_prim_worker_inst", + "name": "Kill primary worker instance", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_prim_worker_inst", + "todo": "allow something like pSite=@@pSite@@ or pSite=%pSite", + "todo1": "allow something like lss>2, lpt>10000, score!=123", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss=(1|2)" , + "srr=P" , + "lpt=(1[6-9]........|20)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=S", + "srHook=(PRIM|SOK)", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , + "score=(90|70|5|0)" + ], + "sHost": [ + "clone_state=(PROMOTED|DEMOTED)", + "roles=master1:master:worker:master" , + "score=(100|145)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=(1[6-9]........|30|20|10)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=(S|P)", + "srHook=PRIM", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(UNDEFINED|DEMOTED)" , + "roles=master1::worker:", + "score=(90|70|5)" + ], + "sHost": [ + "clone_state=(DEMOTED|PROMOTED)" , + "roles=master1:master:worker:master" , + "score=(100|145)" , + "srah=T" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 300, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_worker_node.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_worker_node.json new file mode 100644 index 00000000..e2256cd8 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_prim_worker_node.json @@ -0,0 +1,93 @@ +{ + "test": "kill_prim_worker_node", + "name": "Kill primary worker node", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_prim_worker_node", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=(1[6-9]........|20|10)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=(S|P)", + "srHook=(PRIM|SOK)", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(DEMOTED|UNDEFINED|WAITING4NODES)" , + "score=(90|70|5)" + ], + "sHost": [ + "clone_state=(PROMOTED|DEMOTED)", + "roles=master1:master:worker:master" , + "score=(100|145)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 240, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=(1|2)", + "srr=(P|S)" , + "lpt=(1[6-9]........|30|20|10)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=(PRIM|SFAIL)" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=(S|P)", + "srHook=PRIM", + "srPoll=(SOK|PRIM)" + ], + "pHost": [ + "clone_state=(UNDEFINED|DEMOTED|WAITING4NODES)" , + "roles=master1::worker:" + ], + "sHost": [ + "clone_state=(DEMOTED|PROMOTED)" , + "roles=master1:master:worker:master" , + "score=(100|145|150)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 300, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_indexserver.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_indexserver.json new file mode 100644 index 00000000..409b37d3 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_indexserver.json @@ -0,0 +1,95 @@ +{ + "test": "kill_secn_indexserver", + "name": "Kill secondary indexserver", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_secn_indexserver", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss=4" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(10|30)", + "lss=(1|2)", + "srr=S", + "srHook=SFAIL", + "srPoll=(SFAIL|SOK)" + ], + "pHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ], + "sHost": [ + "clone_state=DEMOTED" , + "roles=master1::worker:" , + "score=(-INFINITY|0)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=4" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=10", + "lss=1", + "srr=S", + "srHook=SFAIL", + "srPoll=(SFAIL|SOK)" + ], + "pHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ], + "sHost": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=(-INFINITY|0|-1)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sCCC to be the same as at test begin", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_inst.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_inst.json new file mode 100644 index 00000000..95f2de32 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_inst.json @@ -0,0 +1,94 @@ +{ + "test": "kill_secn_inst", + "name": "Kill secondary instance", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_secn_inst", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss=4" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(10|30)", + "lss=(1|2)", + "srr=S", + "srHook=SFAIL", + "srPoll=(SFAIL|SOK)" + ], + "pHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ], + "sHost": [ + "clone_state=DEMOTED" , + "roles=master1::worker:" , + "score=(-INFINITY|0)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=4" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=10", + "lss=(1|2)", + "srr=S", + "srHook=(SFAIL|SWAIT)", + "srPoll=(SFAIL|SOK)" + ], + "pHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ], + "sHost": [ + "clone_state=(UNDEFINED|DEMOTED)" , + "roles=master1::worker:" , + "score=(-INFINITY|0|-1)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 240, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_node.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_node.json new file mode 100644 index 00000000..0313c539 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_node.json @@ -0,0 +1,89 @@ +{ + "test": "kill_secn_node", + "name": "Kill secondary master node", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_secn_node", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss=4" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=10", + "lss=1", + "srr=S", + "srHook=SFAIL", + "srPoll=SFAIL" + ], + "pHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=4" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=10", + "lss=(1|2)", + "srr=S", + "srHook=(SFAIL|SWAIT)", + "srPoll=(SFAIL|SOK)" + ], + "pHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ], + "sHost": [ + "clone_state=(UNDEFINED|DEMOTED)" , + "roles=master1::worker:" , + "score=(-INFINITY|0|-1)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_worker_inst.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_worker_inst.json new file mode 100644 index 00000000..c75d63f9 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_worker_inst.json @@ -0,0 +1,74 @@ +{ + "test": "kill_secn_worker_inst", + "name": "Kill secondary worker instance", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_secn_worker_inst", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": "pSiteUp", + "sSite": [ + "lpt=(10|30)", + "lss=(1|2)", + "srr=S", + "srHook=(SFAIL|SWAIT)", + "srPoll=(SFAIL|SOK)" + ], + "pHost": "pHostUp", + "sHost": [ + "clone_state=(DEMOTED|UNDEFINED)" , + "roles=master1::worker:" , + "score=(-INFINITY|0)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": "pSiteUp", + "sSite": [ + "lpt=10", + "lss=(1|2)", + "srr=S", + "srHook=(SFAIL|SWAIT)", + "srPoll=(SFAIL|SOK)" + ], + "pHost": "pHostUp", + "sHost": [ + "clone_state=(UNDEFINED|DEMOTED)" , + "roles=master1::worker:" , + "score=(-INFINITY|0|-1)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_worker_node.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_worker_node.json new file mode 100644 index 00000000..bfb3122f --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/kill_secn_worker_node.json @@ -0,0 +1,72 @@ +{ + "test": "kill_secn_worker_node", + "name": "Kill secondary worker node", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_secn_worker_node", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": "pSiteUp", + "sSite": [ + "lpt=10", + "lss=1", + "srr=S", + "srHook=SFAIL", + "srPoll=SFAIL" + ], + "pHost": "pHostUp", + "sHost": [ + "clone_state=WAITING4NODES" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": "pSiteUp", + "sSite": [ + "lpt=10", + "lss=(1|2)", + "srr=S", + "srHook=(SFAIL|SWAIT)", + "srPoll=(SFAIL|SOK)" + ], + "pHost": "pHostUp", + "sHost": [ + "clone_state=(UNDEFINED|DEMOTED)" , + "roles=master1::worker:" , + "score=(-INFINITY|0|-1)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/maintenance_cluster_turn_hana.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/maintenance_cluster_turn_hana.json new file mode 100644 index 00000000..c9b9fe7f --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/maintenance_cluster_turn_hana.json @@ -0,0 +1,32 @@ +{ + "test": "maintenance_cluster_turn_hana", + "name": "maintenance_cluster_turn_hana", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step40", + "loop": 1, + "wait": 1, + "post": "shell test_maintenance_cluster_turn_hana", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/nop-false.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/nop-false.json new file mode 100644 index 00000000..b8127a4e --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/nop-false.json @@ -0,0 +1,33 @@ +{ + "test": "nop", + "name": "no operation - check, wait and check again (stability check)", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step40", + "loop": 1, + "wait": 1, + "post": "sleep 240", + "global": [ + "topology=Nix" + ], + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step40", + "name": "still running", + "next": "END", + "loop": 1, + "wait": 1, + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/nop.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/nop.json new file mode 100644 index 00000000..705f885a --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/nop.json @@ -0,0 +1,31 @@ +{ + "test": "nop", + "name": "no operation - check, wait and check again (stability check)", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step40", + "loop": 1, + "wait": 1, + "post": "sleep 240", + "global": "globalUp", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step40", + "name": "still running", + "next": "END", + "loop": 1, + "wait": 1, + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/properties.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/properties.json new file mode 100644 index 00000000..7f71d029 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/properties.json @@ -0,0 +1,6 @@ +{ + "sid": "HA1", + "instNo": "10", + "mstResource": "mst_SAPHanaCon_HA1_HDB10", + "clnResource": "cln_SAPHanaTop_HA1_HDB10" +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/properties_ha1_hdb10.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/properties_ha1_hdb10.json new file mode 100644 index 00000000..8da3b39e --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/properties_ha1_hdb10.json @@ -0,0 +1,5 @@ +{ + "sid": "HA1", + "mstResource": "mst_SAPHanaCon_HA1_HDB10", + "clnResource": "cln_SAPHanaTop_HA1_HDB10" +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/restart_cluster.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/restart_cluster.json new file mode 100644 index 00000000..e2ab6a03 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/restart_cluster.json @@ -0,0 +1,31 @@ +{ + "test": "restart_cluster", + "name": "restart_cluster", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step40", + "loop": 1, + "wait": 1, + "post": "shell test_restart_cluster", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/restart_cluster_hana_running.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/restart_cluster_hana_running.json new file mode 100644 index 00000000..feaaa704 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/restart_cluster_hana_running.json @@ -0,0 +1,31 @@ +{ + "test": "restart_cluster_hana_running", + "name": "restart_cluster_hana_running", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step40", + "loop": 1, + "wait": 1, + "post": "shell test_restart_cluster_hana_running", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/restart_cluster_turn_hana.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/restart_cluster_turn_hana.json new file mode 100644 index 00000000..5f1400b4 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/restart_cluster_turn_hana.json @@ -0,0 +1,32 @@ +{ + "test": "restart_cluster_turn_hana", + "name": "restart_cluster_turn_hana", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step40", + "loop": 1, + "wait": 1, + "post": "shell test_restart_cluster_turn_hana", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/sap.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/sap.json new file mode 100644 index 00000000..e4dcb21f --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/sap.json @@ -0,0 +1,136 @@ +{ + "test": "sap", + "name": "standby+online secondary then standby+online primary", + "start": "step10", + "sid": "HA1", + "mstResource": "ms_SAPHanaCon_HA1_HDB00", + "todo": "expectations needs to be fixed - e.g. step20 sHostDown is wrong, because topology will also be stopped. roles will be ::: not master1:...", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites ssn", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "ssn", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "secondary site: node is standby", + "next": "step30", + "loop": 120, + "wait": 2, + "post": "osn", + "pSite": "pSiteUp", + "sSite": "sSiteDown", + "pHost": "pHostUp", + "sHost": "sHostDown" + }, + { + "step": "step30", + "name": "secondary site: node back online", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": "pSiteUp", + "sSite": [ + "lpt=10", + "lss=1", + "srr=S", + "srHook=SWAIT", + "srPoll=SFAIL" + ], + "pHost": "pHostUp", + "sHost": [ + "clone_state=DEMOTED" , + "roles=master1::worker:" , + "score=(-INFINITY|0)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "step110", + "loop": 120, + "wait": 2, + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step110", + "name": "test prerequitsites spn", + "next": "step120", + "loop": 1, + "wait": 1, + "post": "spn", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step120", + "name": "primary site: node is standby", + "next": "step130", + "loop": 120, + "wait": 2, + "pSite": "pSiteDown", + "sSite": [ + "lpt=(30|1[6-9]........)", + "lss=4", + "srr=S", + "srHook=(PRIM|SOK)", + "srPoll=SOK" + ], + "pHost": "pHostDown", + "sHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=(100|145)" + ] + }, + { + "step": "step130", + "name": "takeover on secondary", + "next": "step140", + "loop": 120, + "post": "opn", + "wait": 2, + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=10" , + "srHook=SWAIT" , + "srPoll=SFAIL" + ], + "sSite": "pSiteUp", + "pHost": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=150" , + "standby=on" + ], + "sHost": "pHostUp" + }, + { + "step": "step140", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/spn.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/spn.json new file mode 100644 index 00000000..a5ed1730 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/spn.json @@ -0,0 +1,97 @@ +{ + "test": "spn", + "name": "standby secondary node (and online again)", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "spn", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "node is standby", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(30|1[6-9]........)", + "lss=4", + "srr=S", + "srHook=(PRIM|SOK)", + "srPoll=SOK" + ], + "pHost": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=150" , + "standby=on" + ], + "sHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=(100|145)" + ] + }, + { + "step": "step30", + "name": "takeover on secondary", + "next": "step40", + "loop": 120, + "post": "opn", + "wait": 2, + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=10" , + "srHook=SWAIT" , + "srPoll=SFAIL" + ], + "sSite": [ + "lpt=1[6-9]........", + "lss=4", + "srr=P", + "srHook=PRIM", + "srPoll=PRIM" + ], + "pHost": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=150" , + "standby=on" + ], + "sHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "todo": "allow pointer to step10", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut-BW/angi-ScaleOut/ssn.json b/test/json/classic-ScaleOut-BW/angi-ScaleOut/ssn.json new file mode 100644 index 00000000..137f7176 --- /dev/null +++ b/test/json/classic-ScaleOut-BW/angi-ScaleOut/ssn.json @@ -0,0 +1,98 @@ +{ + "test": "ssn", + "name": "standby secondary node (and online again)", + "start": "step10", + "sid": "HA1", + "mstResource": "ms_SAPHanaCon_HA1_HDB00", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "ssn", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "node is standby", + "next": "step30", + "loop": 120, + "wait": 2, + "post": "osn", + "pSite": [ + "lss=4" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=10", + "lss=1", + "srr=S", + "srHook=SFAIL", + "srPoll=SFAIL" + ], + "pHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ], + "sHost": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=100" , + "standby=on" + ] + }, + { + "step": "step30", + "name": "node back online", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=4" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=10", + "lss=1", + "srr=S", + "srHook=SWAIT", + "srPoll=SFAIL" + ], + "pHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ], + "sHost": [ + "clone_state=DEMOTED" , + "roles=master1::worker:" , + "score=(-INFINITY|0)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/bmt.json b/test/json/classic-ScaleOut/angi-ScaleOut/bmt.json new file mode 100644 index 00000000..ec682537 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/bmt.json @@ -0,0 +1,42 @@ +{ + "test": "bmt", + "name": "blocked manual takeover", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "bmt", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "test prerequitsites", + "next": "step40", + "loop": 1, + "wait": 1, + "post": "sleep 120", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step40", + "name": "still running", + "next": "END", + "loop": 1, + "wait": 1, + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/defaultChecks+newComparators.json b/test/json/classic-ScaleOut/angi-ScaleOut/defaultChecks+newComparators.json new file mode 100644 index 00000000..44699c70 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/defaultChecks+newComparators.json @@ -0,0 +1,66 @@ +{ + "checkPtr": { + "comparartorinline": [ + "alfa!=dassollungleichsein", + "lpa_@@sid@@_lpt > 160000", + "beta=dassollgleichsein" + ], + "comparatortuple": [ + ("noty", "alfa=ungleich"), + () + ], + "globalUp": [ + "topology=ScaleOut" + ], + "pHostUp": [ + "clone_state=PROMOTED", + "roles=master1:master:worker:master", + "score=150" + ], + "pSiteUp": [ + "lpt=1[6-9]........", + "lss=4", + "srr=P", + "srHook=PRIM", + "srPoll=PRIM" + ], + "sSiteUp": [ + "lpt=30", + "lss=4", + "srr=S", + "srHook=SOK", + "srPoll=SOK" + ], + "sHostUp": [ + "clone_state=DEMOTED", + "roles=master1:master:worker:master", + "score=100" + ], + "pHostDown": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=150" , + "standby=on" + ], + "pSiteDown": [ + "lpt=1[6-9]........" , + "lss=1" , + "srr=P" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSiteDown": [ + "lpt=10", + "lss=1", + "srr=S", + "srHook=SFAIL", + "srPoll=SFAIL" + ], + "sHostDown": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=100" , + "standby=on" + ] + } +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/defaultChecks.json b/test/json/classic-ScaleOut/angi-ScaleOut/defaultChecks.json new file mode 100644 index 00000000..7e18eb7c --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/defaultChecks.json @@ -0,0 +1,57 @@ +{ + "checkPtr": { + "globalUp": [ + "topology=ScaleOut" + ], + "pHostUp": [ + "clone_state=PROMOTED", + "roles=master1:master:worker:master", + "score=150" + ], + "pSiteUp": [ + "lpt=1[6-9]........", + "lss=4", + "srr=P", + "srHook=PRIM", + "srPoll=PRIM" + ], + "sSiteUp": [ + "lpt=30", + "lss=4", + "srr=S", + "srHook=SOK", + "srPoll=SOK" + ], + "sHostUp": [ + "clone_state=DEMOTED", + "roles=master1:master:worker:master", + "score=100" + ], + "pHostDown": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=150" , + "standby=on" + ], + "pSiteDown": [ + "lpt=1[6-9]........" , + "lss=1" , + "srr=P" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSiteDown": [ + "lpt=10", + "lss=1", + "srr=S", + "srHook=SFAIL", + "srPoll=SFAIL" + ], + "sHostDown": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=100" , + "standby=on" + ] + } +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/free_log_area.json b/test/json/classic-ScaleOut/angi-ScaleOut/free_log_area.json new file mode 100644 index 00000000..8a664118 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/free_log_area.json @@ -0,0 +1,42 @@ +{ + "test": "free_log_area", + "name": "free log area on primary", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "shell test_free_log_area", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "still running", + "next": "step40", + "loop": 1, + "wait": 1, + "post": "sleep 60", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step40", + "name": "still running", + "next": "END", + "loop": 1, + "wait": 1, + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_indexserver.json b/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_indexserver.json new file mode 100644 index 00000000..31337a39 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_indexserver.json @@ -0,0 +1,97 @@ +{ + "test": "kill_prim_indexserver", + "name": "Kill primary indexserver", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_prim_indexserver", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "comment": "sSite: srPoll could get SFAIL on scale-out", + "pSite": [ + "lss=(1|2)" , + "srr=P" , + "lpt=(1[6-9]........|20)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=S", + "srHook=(PRIM|SOK)", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , + "roles=master1::worker:" , + "score=(90|70|5|0)" + ], + "sHost": [ + "clone_state=(PROMOTED|DEMOTED)" , + "roles=master1:master:worker:master" , + "score=(100|145)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=(1[6-9]........|30|20|10)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=(S|P)", + "srHook=PRIM", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(UNDEFINED|DEMOTED)" , + "roles=master1::worker:" , + "score=(90|70|5)" + ], + "sHost": [ + "clone_state=(DEMOTED|PROMOTED)" , + "roles=master1:master:worker:master" , + "score=(100|145)" , + "srah=T" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 360, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_inst.json b/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_inst.json new file mode 100644 index 00000000..838f026e --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_inst.json @@ -0,0 +1,99 @@ +{ + "test": "kill_prim_inst", + "name": "Kill primary instance", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_prim_inst", + "todo": "allow something like pSite=@@pSite@@ or pSite=%pSite", + "todo1": "allow something like lss>2, lpt>10000, score!=123", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "comment": "sSite: srPoll could get SFAIL on scale-out", + "pSite": [ + "lss=(1|2)" , + "srr=P" , + "lpt=(1[6-9]........|20)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=S", + "srHook=(PRIM|SOK)", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , + "roles=master1::worker:" , + "score=(90|70|5|0)" + ], + "sHost": [ + "clone_state=(PROMOTED|DEMOTED)" , + "roles=master1:master:worker:master" , + "score=(100|145)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=(1[6-9]........|30|20|10)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=(S|P)", + "srHook=PRIM", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(UNDEFINED|DEMOTED)" , + "roles=master1::worker:" , + "score=(90|70|5)" + ], + "sHost": [ + "clone_state=(DEMOTED|PROMOTED)" , + "roles=master1:master:worker:master" , + "score=(100|145)" , + "srah=T" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 360, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_node.json b/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_node.json new file mode 100644 index 00000000..d699c9e0 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_node.json @@ -0,0 +1,91 @@ +{ + "test": "kill_prim_node", + "name": "Kill primary master node", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_prim_node", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=(1[6-9]........|20|10)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=(S|P)", + "srHook=(PRIM|SOK)", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + ], + "sHost": [ + "clone_state=(PROMOTED|DEMOTED)", + "roles=master1:master:worker:master" , + "score=(100|145)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 300, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=(1|2)", + "srr=(P|S)" , + "lpt=(1[6-9]........|30|20|10)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=(PRIM|SFAIL)" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=(S|P)", + "srHook=PRIM", + "srPoll=(SOK|PRIM)" + ], + "pHost": [ + "clone_state=(UNDEFINED|DEMOTED|WAITING4NODES)" , + "roles=master1::worker:" + ], + "sHost": [ + "clone_state=(DEMOTED|PROMOTED)" , + "roles=master1:master:worker:master" , + "score=(100|145|150)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 300, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_worker_indexserver.json b/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_worker_indexserver.json new file mode 100644 index 00000000..ee4d17f0 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_worker_indexserver.json @@ -0,0 +1,95 @@ +{ + "test": "kill_prim_worker_indexserver", + "name": "Kill primary worker indexserver", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_prim_worker_indexserver", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss=(1|2)" , + "srr=P" , + "lpt=(1[6-9]........|20)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=S", + "srHook=(PRIM|SOK)", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , + "score=(90|70|5|0)" + ], + "sHost": [ + "clone_state=(PROMOTED|DEMOTED)", + "roles=master1:master:worker:master" , + "score=(100|145)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "todo2": "why do we need SFAIL for srHook?", + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=(1[6-9]........|30|20|10)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=(S|P)", + "srHook=(PRIM|SFAIL)", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(UNDEFINED|DEMOTED)" , + "score=(90|70|5)" + ], + "sHost": [ + "clone_state=(DEMOTED|PROMOTED)" , + "roles=master1:master:worker:master" , + "score=(100|145)" , + "srah=T" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 360, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_worker_inst.json b/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_worker_inst.json new file mode 100644 index 00000000..59e6d205 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_worker_inst.json @@ -0,0 +1,97 @@ +{ + "test": "kill_prim_worker_inst", + "name": "Kill primary worker instance", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_prim_worker_inst", + "todo": "allow something like pSite=@@pSite@@ or pSite=%pSite", + "todo1": "allow something like lss>2, lpt>10000, score!=123", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss=(1|2)" , + "srr=P" , + "lpt=(1[6-9]........|20)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=S", + "srHook=(PRIM|SOK)", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , + "score=(90|70|5|0)" + ], + "sHost": [ + "clone_state=(PROMOTED|DEMOTED)", + "roles=master1:master:worker:master" , + "score=(100|145)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=(1[6-9]........|30|20|10)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=(S|P)", + "srHook=PRIM", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(UNDEFINED|DEMOTED)" , + "roles=master1::worker:", + "score=(90|70|5)" + ], + "sHost": [ + "clone_state=(DEMOTED|PROMOTED)" , + "roles=master1:master:worker:master" , + "score=(100|145)" , + "srah=T" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 300, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_worker_node.json b/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_worker_node.json new file mode 100644 index 00000000..e2256cd8 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/kill_prim_worker_node.json @@ -0,0 +1,93 @@ +{ + "test": "kill_prim_worker_node", + "name": "Kill primary worker node", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_prim_worker_node", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=(1[6-9]........|20|10)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=(S|P)", + "srHook=(PRIM|SOK)", + "srPoll=(SOK|SFAIL)" + ], + "pHost": [ + "clone_state=(DEMOTED|UNDEFINED|WAITING4NODES)" , + "score=(90|70|5)" + ], + "sHost": [ + "clone_state=(PROMOTED|DEMOTED)", + "roles=master1:master:worker:master" , + "score=(100|145)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 240, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=(1|2)", + "srr=(P|S)" , + "lpt=(1[6-9]........|30|20|10)" , + "srHook=(PRIM|SWAIT|SREG)" , + "srPoll=(PRIM|SFAIL)" + ], + "sSite": [ + "lpt=(1[6-9]........|30)", + "lss=4", + "srr=(S|P)", + "srHook=PRIM", + "srPoll=(SOK|PRIM)" + ], + "pHost": [ + "clone_state=(UNDEFINED|DEMOTED|WAITING4NODES)" , + "roles=master1::worker:" + ], + "sHost": [ + "clone_state=(DEMOTED|PROMOTED)" , + "roles=master1:master:worker:master" , + "score=(100|145|150)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 300, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_indexserver.json b/test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_indexserver.json new file mode 100644 index 00000000..409b37d3 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_indexserver.json @@ -0,0 +1,95 @@ +{ + "test": "kill_secn_indexserver", + "name": "Kill secondary indexserver", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_secn_indexserver", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss=4" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(10|30)", + "lss=(1|2)", + "srr=S", + "srHook=SFAIL", + "srPoll=(SFAIL|SOK)" + ], + "pHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ], + "sHost": [ + "clone_state=DEMOTED" , + "roles=master1::worker:" , + "score=(-INFINITY|0)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=4" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=10", + "lss=1", + "srr=S", + "srHook=SFAIL", + "srPoll=(SFAIL|SOK)" + ], + "pHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ], + "sHost": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=(-INFINITY|0|-1)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sCCC to be the same as at test begin", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_inst.json b/test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_inst.json new file mode 100644 index 00000000..95f2de32 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_inst.json @@ -0,0 +1,94 @@ +{ + "test": "kill_secn_inst", + "name": "Kill secondary instance", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_secn_inst", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss=4" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(10|30)", + "lss=(1|2)", + "srr=S", + "srHook=SFAIL", + "srPoll=(SFAIL|SOK)" + ], + "pHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ], + "sHost": [ + "clone_state=DEMOTED" , + "roles=master1::worker:" , + "score=(-INFINITY|0)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=4" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=10", + "lss=(1|2)", + "srr=S", + "srHook=(SFAIL|SWAIT)", + "srPoll=(SFAIL|SOK)" + ], + "pHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ], + "sHost": [ + "clone_state=(UNDEFINED|DEMOTED)" , + "roles=master1::worker:" , + "score=(-INFINITY|0|-1)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 240, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_node.json b/test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_node.json new file mode 100644 index 00000000..0313c539 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_node.json @@ -0,0 +1,89 @@ +{ + "test": "kill_secn_node", + "name": "Kill secondary master node", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_secn_node", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss=4" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=10", + "lss=1", + "srr=S", + "srHook=SFAIL", + "srPoll=SFAIL" + ], + "pHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=4" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=10", + "lss=(1|2)", + "srr=S", + "srHook=(SFAIL|SWAIT)", + "srPoll=(SFAIL|SOK)" + ], + "pHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ], + "sHost": [ + "clone_state=(UNDEFINED|DEMOTED)" , + "roles=master1::worker:" , + "score=(-INFINITY|0|-1)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_worker_inst.json b/test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_worker_inst.json new file mode 100644 index 00000000..c75d63f9 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_worker_inst.json @@ -0,0 +1,74 @@ +{ + "test": "kill_secn_worker_inst", + "name": "Kill secondary worker instance", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_secn_worker_inst", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": "pSiteUp", + "sSite": [ + "lpt=(10|30)", + "lss=(1|2)", + "srr=S", + "srHook=(SFAIL|SWAIT)", + "srPoll=(SFAIL|SOK)" + ], + "pHost": "pHostUp", + "sHost": [ + "clone_state=(DEMOTED|UNDEFINED)" , + "roles=master1::worker:" , + "score=(-INFINITY|0)" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": "pSiteUp", + "sSite": [ + "lpt=10", + "lss=(1|2)", + "srr=S", + "srHook=(SFAIL|SWAIT)", + "srPoll=(SFAIL|SOK)" + ], + "pHost": "pHostUp", + "sHost": [ + "clone_state=(UNDEFINED|DEMOTED)" , + "roles=master1::worker:" , + "score=(-INFINITY|0|-1)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_worker_node.json b/test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_worker_node.json new file mode 100644 index 00000000..bfb3122f --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/kill_secn_worker_node.json @@ -0,0 +1,72 @@ +{ + "test": "kill_secn_worker_node", + "name": "Kill secondary worker node", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_secn_worker_node", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": "pSiteUp", + "sSite": [ + "lpt=10", + "lss=1", + "srr=S", + "srHook=SFAIL", + "srPoll=SFAIL" + ], + "pHost": "pHostUp", + "sHost": [ + "clone_state=WAITING4NODES" + ] + }, + { + "step": "step30", + "name": "begin recover", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": "pSiteUp", + "sSite": [ + "lpt=10", + "lss=(1|2)", + "srr=S", + "srHook=(SFAIL|SWAIT)", + "srPoll=(SFAIL|SOK)" + ], + "pHost": "pHostUp", + "sHost": [ + "clone_state=(UNDEFINED|DEMOTED)" , + "roles=master1::worker:" , + "score=(-INFINITY|0|-1)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/maintenance_cluster_turn_hana.json b/test/json/classic-ScaleOut/angi-ScaleOut/maintenance_cluster_turn_hana.json new file mode 100644 index 00000000..c9b9fe7f --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/maintenance_cluster_turn_hana.json @@ -0,0 +1,32 @@ +{ + "test": "maintenance_cluster_turn_hana", + "name": "maintenance_cluster_turn_hana", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step40", + "loop": 1, + "wait": 1, + "post": "shell test_maintenance_cluster_turn_hana", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/nop-false.json b/test/json/classic-ScaleOut/angi-ScaleOut/nop-false.json new file mode 100644 index 00000000..b8127a4e --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/nop-false.json @@ -0,0 +1,33 @@ +{ + "test": "nop", + "name": "no operation - check, wait and check again (stability check)", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step40", + "loop": 1, + "wait": 1, + "post": "sleep 240", + "global": [ + "topology=Nix" + ], + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step40", + "name": "still running", + "next": "END", + "loop": 1, + "wait": 1, + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/nop.json b/test/json/classic-ScaleOut/angi-ScaleOut/nop.json new file mode 100644 index 00000000..705f885a --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/nop.json @@ -0,0 +1,31 @@ +{ + "test": "nop", + "name": "no operation - check, wait and check again (stability check)", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step40", + "loop": 1, + "wait": 1, + "post": "sleep 240", + "global": "globalUp", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step40", + "name": "still running", + "next": "END", + "loop": 1, + "wait": 1, + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/properties.json b/test/json/classic-ScaleOut/angi-ScaleOut/properties.json new file mode 100644 index 00000000..7f71d029 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/properties.json @@ -0,0 +1,6 @@ +{ + "sid": "HA1", + "instNo": "10", + "mstResource": "mst_SAPHanaCon_HA1_HDB10", + "clnResource": "cln_SAPHanaTop_HA1_HDB10" +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/properties_ha1_hdb10.json b/test/json/classic-ScaleOut/angi-ScaleOut/properties_ha1_hdb10.json new file mode 100644 index 00000000..8da3b39e --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/properties_ha1_hdb10.json @@ -0,0 +1,5 @@ +{ + "sid": "HA1", + "mstResource": "mst_SAPHanaCon_HA1_HDB10", + "clnResource": "cln_SAPHanaTop_HA1_HDB10" +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/restart_cluster.json b/test/json/classic-ScaleOut/angi-ScaleOut/restart_cluster.json new file mode 100644 index 00000000..e2ab6a03 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/restart_cluster.json @@ -0,0 +1,31 @@ +{ + "test": "restart_cluster", + "name": "restart_cluster", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step40", + "loop": 1, + "wait": 1, + "post": "shell test_restart_cluster", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/restart_cluster_hana_running.json b/test/json/classic-ScaleOut/angi-ScaleOut/restart_cluster_hana_running.json new file mode 100644 index 00000000..feaaa704 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/restart_cluster_hana_running.json @@ -0,0 +1,31 @@ +{ + "test": "restart_cluster_hana_running", + "name": "restart_cluster_hana_running", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step40", + "loop": 1, + "wait": 1, + "post": "shell test_restart_cluster_hana_running", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/restart_cluster_turn_hana.json b/test/json/classic-ScaleOut/angi-ScaleOut/restart_cluster_turn_hana.json new file mode 100644 index 00000000..5f1400b4 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/restart_cluster_turn_hana.json @@ -0,0 +1,32 @@ +{ + "test": "restart_cluster_turn_hana", + "name": "restart_cluster_turn_hana", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step40", + "loop": 1, + "wait": 1, + "post": "shell test_restart_cluster_turn_hana", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/sap.json b/test/json/classic-ScaleOut/angi-ScaleOut/sap.json new file mode 100644 index 00000000..e4dcb21f --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/sap.json @@ -0,0 +1,136 @@ +{ + "test": "sap", + "name": "standby+online secondary then standby+online primary", + "start": "step10", + "sid": "HA1", + "mstResource": "ms_SAPHanaCon_HA1_HDB00", + "todo": "expectations needs to be fixed - e.g. step20 sHostDown is wrong, because topology will also be stopped. roles will be ::: not master1:...", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites ssn", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "ssn", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "secondary site: node is standby", + "next": "step30", + "loop": 120, + "wait": 2, + "post": "osn", + "pSite": "pSiteUp", + "sSite": "sSiteDown", + "pHost": "pHostUp", + "sHost": "sHostDown" + }, + { + "step": "step30", + "name": "secondary site: node back online", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": "pSiteUp", + "sSite": [ + "lpt=10", + "lss=1", + "srr=S", + "srHook=SWAIT", + "srPoll=SFAIL" + ], + "pHost": "pHostUp", + "sHost": [ + "clone_state=DEMOTED" , + "roles=master1::worker:" , + "score=(-INFINITY|0)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "step110", + "loop": 120, + "wait": 2, + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step110", + "name": "test prerequitsites spn", + "next": "step120", + "loop": 1, + "wait": 1, + "post": "spn", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step120", + "name": "primary site: node is standby", + "next": "step130", + "loop": 120, + "wait": 2, + "pSite": "pSiteDown", + "sSite": [ + "lpt=(30|1[6-9]........)", + "lss=4", + "srr=S", + "srHook=(PRIM|SOK)", + "srPoll=SOK" + ], + "pHost": "pHostDown", + "sHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=(100|145)" + ] + }, + { + "step": "step130", + "name": "takeover on secondary", + "next": "step140", + "loop": 120, + "post": "opn", + "wait": 2, + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=10" , + "srHook=SWAIT" , + "srPoll=SFAIL" + ], + "sSite": "pSiteUp", + "pHost": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=150" , + "standby=on" + ], + "sHost": "pHostUp" + }, + { + "step": "step140", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/spn.json b/test/json/classic-ScaleOut/angi-ScaleOut/spn.json new file mode 100644 index 00000000..a5ed1730 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/spn.json @@ -0,0 +1,97 @@ +{ + "test": "spn", + "name": "standby secondary node (and online again)", + "start": "step10", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "spn", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "node is standby", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=(30|1[6-9]........)", + "lss=4", + "srr=S", + "srHook=(PRIM|SOK)", + "srPoll=SOK" + ], + "pHost": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=150" , + "standby=on" + ], + "sHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=(100|145)" + ] + }, + { + "step": "step30", + "name": "takeover on secondary", + "next": "step40", + "loop": 120, + "post": "opn", + "wait": 2, + "pSite": [ + "lss=1" , + "srr=P" , + "lpt=10" , + "srHook=SWAIT" , + "srPoll=SFAIL" + ], + "sSite": [ + "lpt=1[6-9]........", + "lss=4", + "srr=P", + "srHook=PRIM", + "srPoll=PRIM" + ], + "pHost": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=150" , + "standby=on" + ], + "sHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "todo": "allow pointer to step10", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp" + } + ] +} diff --git a/test/json/classic-ScaleOut/angi-ScaleOut/ssn.json b/test/json/classic-ScaleOut/angi-ScaleOut/ssn.json new file mode 100644 index 00000000..137f7176 --- /dev/null +++ b/test/json/classic-ScaleOut/angi-ScaleOut/ssn.json @@ -0,0 +1,98 @@ +{ + "test": "ssn", + "name": "standby secondary node (and online again)", + "start": "step10", + "sid": "HA1", + "mstResource": "ms_SAPHanaCon_HA1_HDB00", + "steps": [ + { + "step": "step10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "ssn", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + }, + { + "step": "step20", + "name": "node is standby", + "next": "step30", + "loop": 120, + "wait": 2, + "post": "osn", + "pSite": [ + "lss=4" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=10", + "lss=1", + "srr=S", + "srHook=SFAIL", + "srPoll=SFAIL" + ], + "pHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ], + "sHost": [ + "clone_state=UNDEFINED" , + "roles=master1::worker:" , + "score=100" , + "standby=on" + ] + }, + { + "step": "step30", + "name": "node back online", + "next": "step40", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss=4" , + "srr=P" , + "lpt=1[6-9]........" , + "srHook=PRIM" , + "srPoll=PRIM" + ], + "sSite": [ + "lpt=10", + "lss=1", + "srr=S", + "srHook=SWAIT", + "srPoll=SFAIL" + ], + "pHost": [ + "clone_state=PROMOTED" , + "roles=master1:master:worker:master" , + "score=150" + ], + "sHost": [ + "clone_state=DEMOTED" , + "roles=master1::worker:" , + "score=(-INFINITY|0)" + ] + }, + { + "step": "step40", + "name": "end recover", + "next": "END", + "loop": 120, + "wait": 2, + "post": "cleanup", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp" + } + ] +} From f434e2767ef5249828c3c81fe450d54cca4495b3 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Thu, 31 Aug 2023 16:04:14 +0200 Subject: [PATCH 32/48] angi: tester: SAPHanaSR-testCluster - improved end message --- test/SAPHanaSR-testCluster | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/SAPHanaSR-testCluster b/test/SAPHanaSR-testCluster index 33ac5465..41b69de5 100755 --- a/test/SAPHanaSR-testCluster +++ b/test/SAPHanaSR-testCluster @@ -120,9 +120,9 @@ while test01.run['count'] <= test01.config['repeat']: test01.run['test_rc'] = test01.process_test() MSG_TEMPL = "TEST: {} testNr={} {} successfully :) ######" if test01.run['test_rc'] == 0: - test01.message(MSG_TEMPL.format(my_test_id, 'PASSED', test01.run['count'])) + test01.message(MSG_TEMPL.format(my_test_id, test01.run['count'], 'PASSED')) else: - test01.message(MSG_TEMPL.format(my_test_id, 'FAILED', test01.run['count'])) + test01.message(MSG_TEMPL.format(my_test_id, test01.run['count'], 'FAILED')) test01.run['count'] += 1 if test01.run['log_file_handle']: test01.run['log_file_handle'].close() From 6cf5799462ce4e6b7309ebcb10d0183b61dd8687 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Fri, 1 Sep 2023 13:27:19 +0200 Subject: [PATCH 33/48] angi: tester: kill_prim_indexserver.json - tuned test expressions --- test/json/angi-ScaleUp/kill_prim_indexserver.json | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/json/angi-ScaleUp/kill_prim_indexserver.json b/test/json/angi-ScaleUp/kill_prim_indexserver.json index ffdb46d9..855efb86 100644 --- a/test/json/angi-ScaleUp/kill_prim_indexserver.json +++ b/test/json/angi-ScaleUp/kill_prim_indexserver.json @@ -29,11 +29,10 @@ "srPoll=PRIM" ], "sSite": [ - "lpt=(1[6-9]........|30)", + "lpt=(1[6-9]........|30|10)", "lss=4", "srr=S", - "srHook=(PRIM|SOK)", - "srPoll=SOK" + "srHook=(PRIM|SOK)" ], "pHost": [ "clone_state=(PROMOTED|DEMOTED|UNDEFINED)" , From 7a161f51a86ffe66cd78121b3aa460820a0d189c Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Fri, 1 Sep 2023 14:14:46 +0200 Subject: [PATCH 34/48] angi: tester: loopTests - removed - use only loopTests-multiNode from now on --- test/loopTests | 71 -------------------------------------------------- 1 file changed, 71 deletions(-) delete mode 100755 test/loopTests diff --git a/test/loopTests b/test/loopTests deleted file mode 100755 index 55940274..00000000 --- a/test/loopTests +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/bash - -function run() { - local node="$1" testID="$2" test_prop="$3" repeat="$4" sleep="$5" - logFile="testLog$(date +"%Y-%m-%d").txt" - logLink="testLog.curr.txt" - if [ ! -f "$logFile" ]; then - touch "$logFile" - fi - if [ -L "$logLink" ]; then - rl=$(readlink "$logLink") - if [[ "$rl" != "$logFile" ]]; then - ln -s -f "$logFile" "$logLink" - fi - else - ln -s "$logFile" "$logLink" - fi - SAPHanaSR-testCluster --testFile="$test_dir/${testID}.json" \ - --remoteNode="$node" \ - --repeat="$repeat" \ - --dumpFailures \ - --defaultChecksFile="$test_dir/defaultChecks.json" \ - --properties="$test_dir/$test_prop.json" \ - --logFile "$local_dir/$logFile" - # ln -s -f testLog2023-03-31.txt testLog.curr.txt - sleep "$sleep"; - return 0 -} - -test_scenario="angi-ScaleUp" -test_case="nop" -test_prop="properties" -local_dir="$PWD" -node="nowhere" - -while [ $# -gt 0 ]; do - case "$1" in - --test_scenario=* ) - test_scenario=${1#*=} - ;; - --node=* ) - node=${1#*=} - ;; - --properties=* ) - test_prop=${1#*=} - ;; - esac - shift -done - -test_dir="/usr/share/SAPHanaSR-tester/json/$test_scenario" - -echo "node: $node, test_scenario=$test_scenario" -while true; do - run "$node" restart_cluster_turn_hana "$test_prop" 1 300 - run "$node" kill_prim_inst "$test_prop" 3 300 - run "$node" free_log_area "$test_prop" 1 60 - run "$node" kill_prim_inst "$test_prop" 1 120 - run "$node" free_log_area "$test_prop" 1 60 - run "$node" ssn "$test_prop" 2 300 - run "$node" kill_secn_inst "$test_prop" 3 300 - run "$node" spn "$test_prop" 2 300 - run "$node" sap "$test_prop" 1 300 - run "$node" kill_prim_indexserver "$test_prop" 1 300 - run "$node" nop "$test_prop" 1 10 - run "$node" kill_secn_indexserver "$test_prop" 1 300 - run "$node" maintenance_cluster_turn_hana "$test_prop" 1 300 - run "$node" restart_cluster "$test_prop" 1 300 - run "$node" restart_cluster_hana_running "$test_prop" 1 300 - run "$node" bmt "$test_prop" 1 60 -done From b056adf85f1976f96ce00e3b70da5387b32faaba Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Fri, 1 Sep 2023 14:19:59 +0200 Subject: [PATCH 35/48] loopTests03 - renamed to loopTests03-multiNode and adapted for multi node calls --- test/{loopTests03 => loopTests03-multiNode} | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) rename test/{loopTests03 => loopTests03-multiNode} (74%) diff --git a/test/loopTests03 b/test/loopTests03-multiNode similarity index 74% rename from test/loopTests03 rename to test/loopTests03-multiNode index 9b74ef35..7ff4c6b5 100755 --- a/test/loopTests03 +++ b/test/loopTests03-multiNode @@ -1,7 +1,7 @@ #!/usr/bin/bash function run() { - local node="$1" testID="$2" test_prop="$3" repeat="$4" sleep="$5" + local testID="$1" test_prop="$2" repeat="$3" sleep="$4" logFile="testLog$(date +"%Y-%m-%d").txt" logLink="testLog.curr.txt" if [ ! -f "$logFile" ]; then @@ -15,12 +15,12 @@ function run() { else ln -s "$logFile" "$logLink" fi - SAPHanaSR-testCluster --testFile="$test_dir/${testID}.json" \ - --remoteNode="$node" \ + SAPHanaSR-testCluster-multiNode --testFile="$test_dir/${testID}.json" \ --repeat="$repeat" \ --dumpFailures \ --defaultChecksFile="$test_dir/defaultChecks.json" \ --properties="$test_dir/$test_prop.json" \ + --remoteNodes $nodes \ --logFile "$local_dir/$logFile" # ln -s -f testLog2023-03-31.txt testLog.curr.txt sleep "$sleep"; @@ -31,7 +31,7 @@ test_scenario="angi-ScaleUp" test_case="nop" test_prop="properties" local_dir="$PWD" -node="nowhere" +nodes="" while [ $# -gt 0 ]; do case "$1" in @@ -39,7 +39,7 @@ while [ $# -gt 0 ]; do test_scenario=${1#*=} ;; --node=* ) - node=${1#*=} + nodes="$nodes ${1#*=}" ;; --properties=* ) test_prop=${1#*=} @@ -50,9 +50,9 @@ done test_dir="/usr/share/SAPHanaSR-tester/json/$test_scenario" -echo "node: $node, test_scenario=$test_scenario" +echo "nodes: $nodes, test_scenario=$test_scenario" while true; do - run "$node" free_log_area "$test_prop" 1 60 - run "$node" kill_prim_inst "$test_prop" 1 60 - run "$node" bmt "$test_prop" 1 60 + run free_log_area "$test_prop" 1 60 + run kill_prim_inst "$test_prop" 1 60 + run bmt "$test_prop" 1 60 done From cf99f96fe3ec6656471844152d70c26a4d1e6ee3 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Fri, 1 Sep 2023 14:24:00 +0200 Subject: [PATCH 36/48] loopTests02 - renamed to loopTests02-multiNode and adapted for multi node calls --- test/{loopTests02 => loopTests02-multiNode} | 44 ++++++++++----------- 1 file changed, 22 insertions(+), 22 deletions(-) rename test/{loopTests02 => loopTests02-multiNode} (52%) diff --git a/test/loopTests02 b/test/loopTests02-multiNode similarity index 52% rename from test/loopTests02 rename to test/loopTests02-multiNode index 158eb8e3..09a7dc21 100755 --- a/test/loopTests02 +++ b/test/loopTests02-multiNode @@ -1,7 +1,7 @@ #!/usr/bin/bash function run() { - local node="$1" testID="$2" test_prop="$3" repeat="$4" sleep="$5" + local testID="$1" test_prop="$2" repeat="$3" sleep="$4" logFile="testLog$(date +"%Y-%m-%d").txt" logLink="testLog.curr.txt" if [ ! -f "$logFile" ]; then @@ -15,12 +15,12 @@ function run() { else ln -s "$logFile" "$logLink" fi - SAPHanaSR-testCluster --testFile="$test_dir/${testID}.json" \ - --remoteNode="$node" \ + SAPHanaSR-testCluster-multiNode --testFile="$test_dir/${testID}.json" \ --repeat="$repeat" \ --dumpFailures \ --defaultChecksFile="$test_dir/defaultChecks.json" \ --properties="$test_dir/$test_prop.json" \ + --remoteNodes $nodes \ --logFile "$local_dir/$logFile" # ln -s -f testLog2023-03-31.txt testLog.curr.txt sleep "$sleep"; @@ -31,7 +31,7 @@ test_scenario="angi-ScaleUp" test_case="nop" test_prop="properties" local_dir="$PWD" -node="nowhere" +nodes="" while [ $# -gt 0 ]; do case "$1" in @@ -39,7 +39,7 @@ while [ $# -gt 0 ]; do test_scenario=${1#*=} ;; --node=* ) - node=${1#*=} + nodes="$nodes ${1#*=}" ;; --properties=* ) test_prop=${1#*=} @@ -50,22 +50,22 @@ done test_dir="/usr/share/SAPHanaSR-tester/json/$test_scenario" -echo "node: $node, test_scenario=$test_scenario" +echo "nodes: $nodes, test_scenario=$test_scenario" while true; do - run "$node" kill_prim_inst "$test_prop" 3 600 - run "$node" free_log_area "$test_prop" 1 60 - run "$node" kill_prim_inst "$test_prop" 1 120 - run "$node" free_log_area "$test_prop" 1 60 - run "$node" restart_cluster_turn_hana "$test_prop" 1 300 - run "$node" ssn "$test_prop" 2 300 - run "$node" kill_secn_inst "$test_prop" 3 300 - run "$node" spn "$test_prop" 2 300 - run "$node" sap "$test_prop" 1 300 - run "$node" kill_prim_indexserver "$test_prop" 1 300 - run "$node" nop "$test_prop" 1 10 - run "$node" kill_secn_indexserver "$test_prop" 1 300 - run "$node" maintenance_cluster_turn_hana "$test_prop" 1 300 - run "$node" restart_cluster "$test_prop" 1 300 - run "$node" restart_cluster_hana_running "$test_prop" 1 300 - run "$node" bmt "$test_prop" 1 60 + run kill_prim_inst "$test_prop" 3 600 + run free_log_area "$test_prop" 1 60 + run kill_prim_inst "$test_prop" 1 120 + run free_log_area "$test_prop" 1 60 + run restart_cluster_turn_hana "$test_prop" 1 300 + run ssn "$test_prop" 2 300 + run kill_secn_inst "$test_prop" 3 300 + run spn "$test_prop" 2 300 + run sap "$test_prop" 1 300 + run kill_prim_indexserver "$test_prop" 1 300 + run nop "$test_prop" 1 10 + run kill_secn_indexserver "$test_prop" 1 300 + run maintenance_cluster_turn_hana "$test_prop" 1 300 + run restart_cluster "$test_prop" 1 300 + run restart_cluster_hana_running "$test_prop" 1 300 + run bmt "$test_prop" 1 60 done From bb87d782e8d24183315b273eb2695503d582d98e Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Fri, 1 Sep 2023 14:26:53 +0200 Subject: [PATCH 37/48] angi: tester: callTest - removed - use only callTest-multiNode from now on --- test/callTest | 65 --------------------------------------------------- 1 file changed, 65 deletions(-) delete mode 100755 test/callTest diff --git a/test/callTest b/test/callTest deleted file mode 100755 index 568b2cba..00000000 --- a/test/callTest +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/bash - -function run() { - local node="$1" testID="$2" test_prop="$3" repeat="$4" sleep="$5" - logFile="testLog$(date +"%Y-%m-%d").txt" - logLink="testLog.curr.txt" - if [ ! -f "$logFile" ]; then - touch "$logFile" - fi - if [ -L "$logLink" ]; then - rl=$(readlink "$logLink") - if [[ "$rl" != "$logFile" ]]; then - ln -s -f "$logFile" "$logLink" - fi - else - ln -s "$logFile" "$logLink" - fi - SAPHanaSR-testCluster --testFile="$test_dir/${testID}.json" \ - --remoteNode="$node" \ - --repeat="$repeat" \ - --dumpFailures \ - --defaultChecksFile="$test_dir/defaultChecks.json" \ - --properties="$test_dir/$test_prop.json" \ - --logFile "$local_dir/$logFile" - # ln -s -f testLog2023-03-31.txt testLog.curr.txt - sleep "$sleep"; - return 0 -} - -function usage() { - echo "usage: $0 [--test_scenario=...] --node=... [--test_case=...] [--properties=...] | --help" -} - -test_scenario="angi-ScaleUp" -test_case="nop" -test_prop="properties" -local_dir="$PWD" -node="nowhere" - -while [ $# -gt 0 ]; do - case "$1" in - --test_scenario=* ) - test_scenario=${1#*=} - ;; - --node=* ) - node=${1#*=} - ;; - --test_case=* ) - test_case=${1#*=} - ;; - --properties=* ) - test_prop=${1#*=} - ;; - --help* ) - usage - exit 2 - ;; - esac - shift -done - -test_dir="/usr/share/SAPHanaSR-tester/json/$test_scenario" - -echo "node: $node, test_scenario=$test_scenario, test_case=$test_case" -run "$node" "$test_case" "$test_prop" 1 10 From 71a8cf9d587e83d3bde9f3d84cca74bf2f5c6e05 Mon Sep 17 00:00:00 2001 From: lpinne Date: Mon, 18 Sep 2023 14:15:41 +0200 Subject: [PATCH 38/48] SAPHanaSR.7 SAPHanaSR-ScaleOut.7: NSE supported --- man/SAPHanaSR-ScaleOut.7 | 9 ++++++++- man/SAPHanaSR.7 | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/man/SAPHanaSR-ScaleOut.7 b/man/SAPHanaSR-ScaleOut.7 index 063b7e74..9a6b8cef 100644 --- a/man/SAPHanaSR-ScaleOut.7 +++ b/man/SAPHanaSR-ScaleOut.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR-ScaleOut 7 "09 May 2023" "" "SAPHanaSR-angi" +.TH SAPHanaSR-ScaleOut 7 "18 Sep 2023" "" "SAPHanaSR-angi" .\" .SH NAME SAPHanaSR-ScaleOut \- Tools for automating SAP HANA system replication in @@ -317,6 +317,13 @@ memory can be used, as long as they are transparent to SUSE HA. .PP 24. The SAPHanaController RA, the SUSE HA cluster and several SAP components need read/write access and sufficient space in the Linux /tmp filesystem. +.PP +25. SAP HANA Native Storage Extension (NSE) is supported in Scale-Up and +Scale-Out. Important is that this feature does not change the HANA topology or +interfaces. +In opposite to Native Storage Extension, the HANA Extension Nodes are changing +the topology and thus currently are not supported. +Please refer to SAP documentation for details. .PP .\" .SH BUGS diff --git a/man/SAPHanaSR.7 b/man/SAPHanaSR.7 index ed4f95dc..4f8b695a 100644 --- a/man/SAPHanaSR.7 +++ b/man/SAPHanaSR.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR 7 "08 May 2023" "" "SAPHanaSR-angi" +.TH SAPHanaSR 7 "18 Sep 2023" "" "SAPHanaSR-angi" .\" .SH NAME SAPHanaSR \- Tools for automating SAP HANA system replication in scale-up setups. @@ -284,6 +284,13 @@ character or number. Subsequent characters may contain dash and underscore. 23. The SAPHanaController RA, the SUSE HA cluster and several SAP components need read/write access and sufficient space in the Linux /tmp filesystem. .PP +24. SAP HANA Native Storage Extension (NSE) is supported in Scale-Up and +Scale-Out. Important is that this feature does not change the HANA topology or +interfaces. +In opposite to Native Storage Extension, the HANA Extension Nodes are changing +the topology and thus currently are not supported. +Please refer to SAP documentation for details. +.PP .\" .SH BUGS .\" TODO From efc1016d034a7b44494414be78e3bd688782b4fa Mon Sep 17 00:00:00 2001 From: lpinne Date: Tue, 19 Sep 2023 11:33:52 +0200 Subject: [PATCH 39/48] SAPHanaSR.7 SAPHanaSR-ScaleOut.7: aligned with classic --- man/SAPHanaSR-ScaleOut.7 | 10 ++++++---- man/SAPHanaSR.7 | 5 ++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/man/SAPHanaSR-ScaleOut.7 b/man/SAPHanaSR-ScaleOut.7 index 9a6b8cef..95173a15 100644 --- a/man/SAPHanaSR-ScaleOut.7 +++ b/man/SAPHanaSR-ScaleOut.7 @@ -315,12 +315,14 @@ to return in time. 23. The SAP HANA Fast Restart feature on RAM-tmpfs as well as HANA on persistent memory can be used, as long as they are transparent to SUSE HA. .PP -24. The SAPHanaController RA, the SUSE HA cluster and several SAP components +24. The SAP HANA site name is from 2 up to 32 characters long. It starts with a +character or number. Subsequent characters may contain dash and underscore. +.PP +25. The SAPHanaController RA, the SUSE HA cluster and several SAP components need read/write access and sufficient space in the Linux /tmp filesystem. .PP -25. SAP HANA Native Storage Extension (NSE) is supported in Scale-Up and -Scale-Out. Important is that this feature does not change the HANA topology or -interfaces. +26. SAP HANA Native Storage Extension (NSE) is supported. +Important is that this feature does not change the HANA topology or interfaces. In opposite to Native Storage Extension, the HANA Extension Nodes are changing the topology and thus currently are not supported. Please refer to SAP documentation for details. diff --git a/man/SAPHanaSR.7 b/man/SAPHanaSR.7 index 4f8b695a..7bb0255a 100644 --- a/man/SAPHanaSR.7 +++ b/man/SAPHanaSR.7 @@ -284,9 +284,8 @@ character or number. Subsequent characters may contain dash and underscore. 23. The SAPHanaController RA, the SUSE HA cluster and several SAP components need read/write access and sufficient space in the Linux /tmp filesystem. .PP -24. SAP HANA Native Storage Extension (NSE) is supported in Scale-Up and -Scale-Out. Important is that this feature does not change the HANA topology or -interfaces. +24. SAP HANA Native Storage Extension (NSE) is supported. +Important is that this feature does not change the HANA topology or interfaces. In opposite to Native Storage Extension, the HANA Extension Nodes are changing the topology and thus currently are not supported. Please refer to SAP documentation for details. From c8e9ba2d5fd60b0efae81d4774c4f02e32534b4c Mon Sep 17 00:00:00 2001 From: lpinne Date: Mon, 25 Sep 2023 17:29:35 +0200 Subject: [PATCH 40/48] SAPHanaSR_maintenance_examples.7 SAPHanaSR-ScaleOut_basic_cluster.7: whith -> with --- man/SAPHanaSR-ScaleOut_basic_cluster.7 | 2 +- man/SAPHanaSR_maintenance_examples.7 | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/man/SAPHanaSR-ScaleOut_basic_cluster.7 b/man/SAPHanaSR-ScaleOut_basic_cluster.7 index 21d1933d..6d18372e 100644 --- a/man/SAPHanaSR-ScaleOut_basic_cluster.7 +++ b/man/SAPHanaSR-ScaleOut_basic_cluster.7 @@ -32,7 +32,7 @@ If the cluster uses disk-less SBD, the no-quorum-policy 'suicide' is required. The crm basic parameter default-resource-stickiness defines the 'stickiness' score a resource gets on the node where it is currently running. This prevents -the cluster from moving resources around whithout an urgent need during a +the cluster from moving resources around without an urgent need during a cluster transition. The correct value depends on number of resources, colocation rules and resource groups. Particularly additional groups colocated to the HANA primary master resource can affect cluster decisions. diff --git a/man/SAPHanaSR_maintenance_examples.7 b/man/SAPHanaSR_maintenance_examples.7 index 243dabac..1c890c95 100644 --- a/man/SAPHanaSR_maintenance_examples.7 +++ b/man/SAPHanaSR_maintenance_examples.7 @@ -384,7 +384,7 @@ This procedure can be used to update RAs, HANA HADR provider hook scripts and re \fB*\fR Remove left-over maintenance attribute from overall Linux cluster. This could be done to avoid confusion caused by different maintenance procedures. -See above overview on maintenance procedures whith running Linux cluster. +See above overview on maintenance procedures with running Linux cluster. Before doing so, check for cluster attribute maintenance-mode="false". .PP .RS 4 @@ -400,7 +400,7 @@ Before doing so, check for cluster attribute maintenance-mode="false". \fB*\fR Remove left-over standby attribute from Linux cluster nodes. This could be done to avoid confusion caused by different maintenance procedures. -See above overview on maintenance procedures whith running Linux cluster. +See above overview on maintenance procedures with running Linux cluster. Before doing so for all nodes, check for node attribute standby="off" on all nodes. .PP .RS 4 @@ -416,7 +416,7 @@ Before doing so for all nodes, check for node attribute standby="off" on all nod \fB*\fR Remove left-over maintenance attribute from resource. This should usually not be needed. -See above overview on maintenance procedures whith running Linux cluster. +See above overview on maintenance procedures with running Linux cluster. .PP .RS 4 # SAPHanaSR-showAttr From 5e856ab8163b497b3335f02c517acb54d0edeb1b Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 27 Sep 2023 14:44:39 +0200 Subject: [PATCH 41/48] ocf_suse_SAPHanaFilesystem.7: fixed text --- man/ocf_suse_SAPHanaFilesystem.7 | 3 --- 1 file changed, 3 deletions(-) diff --git a/man/ocf_suse_SAPHanaFilesystem.7 b/man/ocf_suse_SAPHanaFilesystem.7 index 61f40bc0..066fe87c 100644 --- a/man/ocf_suse_SAPHanaFilesystem.7 +++ b/man/ocf_suse_SAPHanaFilesystem.7 @@ -223,9 +223,6 @@ clone cln_SAPHanaFil_SLE_HDB00 rsc_SAPHanaFil_SLE_HDB00 \\ meta clone-node-max="1" notify="true" interleave="true" .RE .PP -* Example configuration for a SAPHanaFilesystem resource on HANA scale-out. -.PP -The HANA consists of two sites with several nodes each. An additional cluster node * Example configuration for a SAPHanaFilesystem resource for HANA scale-out. .PP The HANA consists of two sites with several nodes each. An additional cluster node From 3f2ecb0583e79818049c94bb0c260a5442c0a773 Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 4 Oct 2023 11:47:26 +0200 Subject: [PATCH 42/48] SAPHanaSR-showAttr.8 ocf_suse_SAPHanaTopology.7 ocf_suse_SAPHanaController.7 ocf_suse_SAPHana.7: rc details --- man/SAPHanaSR-showAttr.8 | 8 +++++--- man/ocf_suse_SAPHana.7 | 4 ++-- man/ocf_suse_SAPHanaController.7 | 4 ++-- man/ocf_suse_SAPHanaTopology.7 | 5 ++--- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/man/SAPHanaSR-showAttr.8 b/man/SAPHanaSR-showAttr.8 index b5c0b315..5ad551e1 100644 --- a/man/SAPHanaSR-showAttr.8 +++ b/man/SAPHanaSR-showAttr.8 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR-showAttr 8 "08 May 2023" "" "SAPHanaSR" +.TH SAPHanaSR-showAttr 8 "04 Oct 2023" "" "SAPHanaSR" .\" .SH NAME SAPHanaSR-showAttr \- Shows Linux cluster attributes for SAP HANA system replication. @@ -267,12 +267,14 @@ Value: [ 4 | 3 | 2 | 1 | 0 ] This field contains the return code of landscapHostConfiguration.py. The parameter does not tell you if the secondary system is ready for a takeover. The meaning is different from common Linux return codes. +The SAPHanaController and SAPHanaTopology RAs will interpret return code 1 as +NOT-RUNNING (or ERROR) and return codes 2+3+4 as RUNNING. .br 4 = OK - Everything looks perfect on the HANA primary. .br -3 = WARNING - A HANA Host Auto-Failover is taking place. +3 = WARNING - An internal HANA action is ongoing, e.g. host auto-failover. .br -2 = INFO - The landscape is completely functional, but the actual role of the host differs from the configured role. +2 = INFO - The landscape is completely functional, but the actual host role differs from the configured role. .br 1 = DOWN - There are not enough active hosts. .br diff --git a/man/ocf_suse_SAPHana.7 b/man/ocf_suse_SAPHana.7 index dbb55af4..e960b455 100644 --- a/man/ocf_suse_SAPHana.7 +++ b/man/ocf_suse_SAPHana.7 @@ -1,6 +1,6 @@ .\" Version: 0.160.1 .\" -.TH ocf_suse_SAPHana 7 "27 Jun 2022" "" "OCF resource agents" +.TH ocf_suse_SAPHana 7 "03 Oct 2022" "" "OCF resource agents" .\" .SH NAME SAPHana \- Manages takeover between two SAP HANA databases with system replication. @@ -46,7 +46,7 @@ landscapeHostConfiguration.py has some detailed output about HANA system status and node roles. For our monitor the overall status is relevant. This overall status is reported by the return code of the script: 0: Internal Fatal, 1: ERROR, 2: WARNING, 3: INFO, 4: OK -The SAPHana resource agent will interpret return code 0 as FATAL, 1 as not-running +The SAPHana resource agent will interpret return code 0 as FATAL, 1 as NOT-RUNNING (or ERROR) and return codes 2+3+4 as RUNNING. .PP 3. \fBhdbnsutil\fR diff --git a/man/ocf_suse_SAPHanaController.7 b/man/ocf_suse_SAPHanaController.7 index ec8f0e4f..08f55656 100644 --- a/man/ocf_suse_SAPHanaController.7 +++ b/man/ocf_suse_SAPHanaController.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH ocf_suse_SAPHanaController 7 "09 Aug 2022" "" "OCF resource agents" +.TH ocf_suse_SAPHanaController 7 "04 Oct 2023" "" "OCF resource agents" .\" .SH NAME SAPHanaController \- Manages takeover between two SAP HANA databases with system replication. @@ -50,7 +50,7 @@ landscapeHostConfiguration.py has some detailed output about HANA system status and node roles. For our monitor the overall status is relevant. This overall status is reported by the return code of the script: 0: Internal Fatal, 1: ERROR, 2: WARNING, 3: INFO, 4: OK -The SAPHanaController resource agent will interpret return code 0 as FATAL, 1 as not-running +The SAPHanaController resource agent will interpret return code 0 as FATAL, 1 as NOT-RUNNING (or ERROR) and return codes 2+3+4 as RUNNING. .br Note: Some conditions cause HANA stopping to work, but not reporting an error. E.g. filesystem filled up. diff --git a/man/ocf_suse_SAPHanaTopology.7 b/man/ocf_suse_SAPHanaTopology.7 index 5bd3688a..8a64b821 100644 --- a/man/ocf_suse_SAPHanaTopology.7 +++ b/man/ocf_suse_SAPHanaTopology.7 @@ -23,11 +23,10 @@ The resource agent uses the following interfaces provided by SAP: landscapeHostConfiguration.py has some detailed output about HANA system status and node roles. For our monitor the overall status is relevant. This overall status is reported by the return code of the script: -0: Internal Fatal 1: ERROR 2: WARNING 3: INFO (maybe a switch of the resource -running) 4: OK +0: Internal Fatal, 1: ERROR, 2: WARNING, 3: INFO (e.g. host auto-failover happened), 4: OK .br The SAPHanaTopology resource agent will interpret return codes 1 as -NOT-RUNNING (or 1 failure) and return codes 2+3+4 as RUNNING. +NOT-RUNNING (or ERROR) and return codes 2+3+4 as RUNNING. SAPHanaTopology scans the output table of landscapeHostConfiguration.py to identify the roles of the cluster node. Roles means configured and current role of the nameserver as well as the indexserver. From 5c844fc1e32802c15fbe49b7ffa710a19109c2ce Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 11 Oct 2023 11:44:17 +0200 Subject: [PATCH 43/48] susChkSrv.py: fixed description (docstring) and updated TODOs --- srHook/susChkSrv.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/srHook/susChkSrv.py b/srHook/susChkSrv.py index 9bf9947b..c40da7ac 100755 --- a/srHook/susChkSrv.py +++ b/srHook/susChkSrv.py @@ -12,7 +12,7 @@ provider = susChkSrv path = /usr/share/SAPHanaSR execution_order = 2 - action_on_lost = kill | stop | ignore (fence and attr currently not implemented) + action_on_lost = kill | stop | ignore | fence (attr is currently not implemented) stop_timeout = 20 # timeout = timeout-in-seconds (currently not implemented) @@ -23,8 +23,8 @@ selected by a parameter. TODO: The hook might not do it's action, if the SR is not-in-sync. Maybe to be selected by a parameter -TODO: actions "fence", "attr" (attr is to inform the cluster (RA) to handle this SAP instance - as broken) +TODO: action "attr" (attr is to inform the cluster (RA) to handle this SAP instance + as broken - maybe the project will not implement this as the other actions are already sufficient) TODO: action "kill". The hard-coded sleep 5 is to allow the nameserver to log events. To be checked, if 5s is a good sleep time. Maybe to be tuned by a parameter TODO: To be tested with "real" slow dying indexservers From 04dbbff215c6c147ec14582b2eae983265c50658 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Thu, 12 Oct 2023 16:43:42 +0200 Subject: [PATCH 44/48] susHanaSR.py: handle pending fallback file (see also bsc1215693 (was about SAPHanaSR-ScaleOut) --- srHook/susHanaSR.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/srHook/susHanaSR.py b/srHook/susHanaSR.py index e4468d08..a11ed691 100755 --- a/srHook/susHanaSR.py +++ b/srHook/susHanaSR.py @@ -89,6 +89,8 @@ def srConnectionChanged(self, ParamDict, **kwargs): ret_code = os.system(my_cmd) my_msg = f"CALLING CRM: <{my_cmd}> ret_code={ret_code}" self.tracer.info(f"{self.__class__.__name__}.{method}() {my_msg}\n") + fallback_file_name = f"../.crm_attribute.{my_site}" + fallback_stage_file_name = f"../.crm_attribute.stage.{my_site}" if ret_code != 0: # # FALLBACK @@ -104,16 +106,14 @@ def srConnectionChanged(self, ParamDict, **kwargs): # however we go one level up (..) to have the file accessible for all # SAP HANA swarm nodes # - stage_file = f"../.crm_attribute.stage.{my_site}" attribute_name = f"hana_{mysid_lower}_site_srHook_{my_site}" - with open(f"{stage_file}", "w", encoding="UTF-8") as fallback_file_obj: + with open(fallback_stage_file_name, "w", encoding="UTF-8") as fallback_file_obj: fallback_file_obj.write(f"{attribute_name} = {my_srs}") # # release the stage file to the original name (move is used to be atomic) # .crm_attribute.stage. is renamed to .crm_attribute. # - os.rename(f"../.crm_attribute.stage.{my_site}", - f"../.crm_attribute.{my_site}") + os.rename(fallback_stage_file_name, fallback_file_name) return 0 except NameError as e: print(f"Could not find base class ({e})") From d4da289e709acd1b54ebeac91f4b6ef78e0be1f4 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 17 Oct 2023 16:57:46 +0200 Subject: [PATCH 45/48] saphana-topology-lib - write virt host name attribute again --- ra/saphana-topology-lib | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/ra/saphana-topology-lib b/ra/saphana-topology-lib index 3cd78188..05cb7b5b 100755 --- a/ra/saphana-topology-lib +++ b/ra/saphana-topology-lib @@ -328,8 +328,29 @@ function sht_monitor() { # called by: TODO super_ocf_log info "FLOW ${FUNCNAME[0]} ()" local rc=0 + local vName="${NODENAME}" # TODO PRIO 1: 'node_role_walk' is only needed for scale-out; how to differ that here? node_role_walk --standbyFilter=off + # DONE: PRIO1: ASK: Is the output format of ListInstances fix? Could we take that as an API? + # try to catch: Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691 + # We rely on the following format: SID is word#4, NR is word#6, vHost is word#8 + #### SAP-CALL + if [ -e /usr/sap/hostctrl/exe/saphostctrl ]; then + vName=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances \ + | awk '$4 == SID && $6=NR { print $8 }' SID=$SID NR=$InstanceNr 2>/dev/null ) + super_ocf_log debug "DBG: ListInstances: $(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances)" + else + super_ocf_log error "ERR: SAPHOSTAGENT is not installed at /usr/sap/hostctrl/exe (saphostctrl missing)" + fi + if [ -n "$vName" ]; then + set_hana_attribute ${NODENAME} "$vName" "${ATTR_NAME_HANA_VHOST[@]}" "${NODENAME}" + else + vName=$(get_hana_attribute ${NODENAME} "${ATTR_NAME_HANA_VHOST[@]}" "${NODENAME}") + fi + # last fallback, if neither the HANA call NOR the Attribute "knows" the vName - try the local hostname + if [ -z "$vName" ]; then + vName=${NODENAME} + fi if [ -f "$HA_RSCTMP/SAPHana/SAPTopologyON.${SID}" ]; then rc="$OCF_SUCCESS" else From 9ae6848a493a35dcb411fedf8f631ebc281e292e Mon Sep 17 00:00:00 2001 From: lpinne Date: Tue, 17 Oct 2023 17:04:44 +0200 Subject: [PATCH 46/48] SAPHanaSR_basic_cluster.7: typo --- man/SAPHanaSR_basic_cluster.7 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/SAPHanaSR_basic_cluster.7 b/man/SAPHanaSR_basic_cluster.7 index d0404ad7..7f656e39 100644 --- a/man/SAPHanaSR_basic_cluster.7 +++ b/man/SAPHanaSR_basic_cluster.7 @@ -22,7 +22,7 @@ configurations might match specific needs. The crm basic parameter default-resource-stickiness defines the 'stickiness' score a resource gets on the node where it is currently running. This prevents -the cluster from moving resources around whithout an urgent need during a +the cluster from moving resources around without an urgent need during a cluster transition. The correct value depends on number of resources, colocation rules and resource groups. Particularly additional groups colocated to the HANA primary master resource can affect cluster decisions. From 9a41a8e3431c3c21db613af3f39a591557bae9d3 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 17 Oct 2023 17:20:23 +0200 Subject: [PATCH 47/48] saphana-controller-lib - register to use virt host name attribute, if available --- ra/saphana-controller-lib | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ra/saphana-controller-lib b/ra/saphana-controller-lib index 33e127ab..8a06ca4a 100755 --- a/ra/saphana-controller-lib +++ b/ra/saphana-controller-lib @@ -592,6 +592,7 @@ function register_hana_secondary() { local rc=2; local remoteInstance=""; local remoteHanaHost=""; + local vHost="" # TODO PRIO1: NG - scale-up is normally always the_master_nameserver if is_the_master_nameserver; then remoteInstance="$InstanceNr" @@ -609,13 +610,17 @@ function register_hana_secondary() { set_hana_attribute "$NODENAME" "$gSite" "${ATTR_NAME_HANA_SEC[@]}" remoteHanaHost=$(get_hana_site_attribute "$remSite" "${ATTR_NAME_HANA_SITE_MNS[@]}") # TODO PRIO2: NG - only start register, if all variables are set - super_ocf_log info "ACT: SAP HANA REGISTER: hdbnsutil -sr_register --remoteHost=$remoteHanaHost --remoteInstance=$remoteInstance --replicationMode=$hanaRM --operationMode=$hanaOM --name=$gSite" + vHost=$(get_hana_attribute "$remoteHanaHost" "${ATTR_NAME_HANA_VHOST[@]}") + if [ -z "$vHost" ]; then + vHost="$remoteHanaHost" + fi + super_ocf_log info "ACT: SAP HANA REGISTER: hdbnsutil -sr_register --remoteHost=$vHost --remoteInstance=$remoteInstance --replicationMode=$hanaRM --operationMode=$hanaOM --name=$gSite" # # set status "R" for SRACTION attribute to interact with srTkOver; SRACTION_HISTORY is kept till next monitor # set_hana_attribute "$NODENAME" "R" "${ATTR_NAME_HANA_SRACTION[@]}" set_hana_attribute "$NODENAME" "R" "${ATTR_NAME_HANA_SRACTION_HISTORY[@]}" - HANA_CALL --timeout inf --use-su --cmd "hdbnsutil -sr_register --remoteHost=$remoteHanaHost --remoteInstance=$remoteInstance --replicationMode=$hanaRM --operationMode=$hanaOM --name=$gSite"; rc=$? + HANA_CALL --timeout inf --use-su --cmd "hdbnsutil -sr_register --remoteHost=$vHost --remoteInstance=$remoteInstance --replicationMode=$hanaRM --operationMode=$hanaOM --name=$gSite"; rc=$? # # resset status "-" for SRACTION attribute to interact with srTkOver; SRACTION_HISTORY is kept till next monitor # From c1f89d672dfe989505be10131c9c0964b7389eaa Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 17 Oct 2023 18:05:53 +0200 Subject: [PATCH 48/48] angi - version 1.2.2 --- SAPHanaSR-angi.spec | 2 +- ra/saphana-topology-lib | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/SAPHanaSR-angi.spec b/SAPHanaSR-angi.spec index 09e92d51..3c6b7ec7 100644 --- a/SAPHanaSR-angi.spec +++ b/SAPHanaSR-angi.spec @@ -21,7 +21,7 @@ License: GPL-2.0 Group: Productivity/Clustering/HA AutoReqProv: on Summary: Resource agents to control the HANA database in system replication setup -Version: 1.2.1 +Version: 1.2.2 Release: 0 Url: https://www.suse.com/c/fail-safe-operation-of-sap-hana-suse-extends-its-high-availability-solution/ diff --git a/ra/saphana-topology-lib b/ra/saphana-topology-lib index 05cb7b5b..129814f9 100755 --- a/ra/saphana-topology-lib +++ b/ra/saphana-topology-lib @@ -337,15 +337,15 @@ function sht_monitor() { #### SAP-CALL if [ -e /usr/sap/hostctrl/exe/saphostctrl ]; then vName=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances \ - | awk '$4 == SID && $6=NR { print $8 }' SID=$SID NR=$InstanceNr 2>/dev/null ) + | awk '$4 == SID && $6=NR { print $8 }' SID="$SID" NR="$InstanceNr" 2>/dev/null ) super_ocf_log debug "DBG: ListInstances: $(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances)" else super_ocf_log error "ERR: SAPHOSTAGENT is not installed at /usr/sap/hostctrl/exe (saphostctrl missing)" fi if [ -n "$vName" ]; then - set_hana_attribute ${NODENAME} "$vName" "${ATTR_NAME_HANA_VHOST[@]}" "${NODENAME}" + set_hana_attribute "${NODENAME}" "$vName" "${ATTR_NAME_HANA_VHOST[@]}" "${NODENAME}" else - vName=$(get_hana_attribute ${NODENAME} "${ATTR_NAME_HANA_VHOST[@]}" "${NODENAME}") + vName=$(get_hana_attribute "${NODENAME}" "${ATTR_NAME_HANA_VHOST[@]}" "${NODENAME}") fi # last fallback, if neither the HANA call NOR the Attribute "knows" the vName - try the local hostname if [ -z "$vName" ]; then