From ed4b362ed3d8da8abd3b4e84d254b0d709a189fb Mon Sep 17 00:00:00 2001 From: Jan Friesse Date: Mon, 25 Sep 2023 11:40:01 +0200 Subject: [PATCH 01/12] main: Fix exit code on grant/revoke command error Client command grant/revoke was returning success exit code on some failures (site not configured, arbitrator, no tickets given). Test case is running `booth grant -s $IP; echo $?`, where IP is not configured in the config file. Patch fixes this behavior so error code is returned. Signed-off-by: Jan Friesse --- src/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.c b/src/main.c index b4a174f4..6b531bfc 100644 --- a/src/main.c +++ b/src/main.c @@ -762,7 +762,7 @@ static int do_command(cmd_request_t cmd) else if (cmd == CMD_REVOKE) op_str = "revoke"; - rv = 0; + rv = -1; site = NULL; /* Always use TCP for client - at least for now. */ From d9b1f0dd43ad665ed96f9e269d1ee0b92a6e75e9 Mon Sep 17 00:00:00 2001 From: Jan Friesse Date: Mon, 25 Sep 2023 18:26:42 +0200 Subject: [PATCH 02/12] attr: Fix memory leak for list and get operation attr_get and attr_list were allocation GString but they were calling g_string_free with second argument set to FALSE, what means GLib was returning character data to caller (see g_string_free for complete documentation) instead of freeing them. This return value was ignored by attr_* functions so memory leaked. Patch to fix this problem is simple - just set second argument of g_string_free to TRUE so GLib takes care of freeing all data. Signed-off-by: Jan Friesse --- src/attr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/attr.c b/src/attr.c index 09c15bc1..ad411092 100644 --- a/src/attr.c +++ b/src/attr.c @@ -372,7 +372,7 @@ static cmd_result_t attr_get(struct ticket_config *tk, int fd, struct boothc_att if (send_header_plus(fd, &hdr, attr_val->str, attr_val->len)) rv = RLT_SYNC_FAIL; if (attr_val) - g_string_free(attr_val, FALSE); + g_string_free(attr_val, TRUE); return rv; } @@ -398,7 +398,7 @@ static cmd_result_t attr_list(struct ticket_config *tk, int fd, struct boothc_at rv = send_header_plus(fd, &hdr, data->str, data->len); if (data) - g_string_free(data, FALSE); + g_string_free(data, TRUE); return rv; } From 24eb02003fee0064e20214a92477aad4cebbdc65 Mon Sep 17 00:00:00 2001 From: Jan Friesse Date: Thu, 5 Oct 2023 09:33:12 +0200 Subject: [PATCH 03/12] attr: Fix glib hash_table != NULL assert Ticket attribute hash table is created only when some attribute exists. If it doesn't, list and get operations were producing glib assert. Patch adds check in attr_get and attr_list so hash table is used only when it has been initialized. Signed-off-by: Jan Friesse --- src/attr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/attr.c b/src/attr.c index ad411092..44061e35 100644 --- a/src/attr.c +++ b/src/attr.c @@ -357,6 +357,8 @@ static cmd_result_t attr_get(struct ticket_config *tk, int fd, struct boothc_att * lookup attr * send value */ + if (!tk->attr) + return RLT_NO_SUCH_ATTR; a = (struct geo_attr *)g_hash_table_lookup(tk->attr, msg->attr.name); if (!a) @@ -391,7 +393,9 @@ static cmd_result_t attr_list(struct ticket_config *tk, int fd, struct boothc_at log_error("out of memory"); return RLT_SYNC_FAIL; } - g_hash_table_foreach(tk->attr, append_attr, data); + if (tk->attr) { + g_hash_table_foreach(tk->attr, append_attr, data); + } init_header(&hdr.header, ATTR_LIST, 0, 0, RLT_SUCCESS, 0, sizeof(hdr) + data->len); From 2dba1d97a2b549e69d456987d2fb120b4dbac276 Mon Sep 17 00:00:00 2001 From: Jan Friesse Date: Fri, 6 Oct 2023 10:11:34 +0200 Subject: [PATCH 04/12] tests: Fix Python 3.12 warning Python 3.12 warns about invalid escape sequence '\s'. It is still correctly converted, but I guess it is better to escape '\' character properly to avoid future breakage. Signed-off-by: Jan Friesse --- test/boothtestenv.py.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/boothtestenv.py.in b/test/boothtestenv.py.in index 8a236159..6c6e3d36 100644 --- a/test/boothtestenv.py.in +++ b/test/boothtestenv.py.in @@ -36,7 +36,7 @@ class BoothTestEnvironment(unittest.TestCase, BoothAssertions): # pid for those and only those, which is exactly what we want # here. subprocess.call("(netstat -tlnp || ss -tlnp) 2>&1 | " + - "perl -lne '(m,LISTEN\s+(\d+)/boothd, || /\"boothd\".*pid=(\d+)/) and kill 15, $1'", + "perl -lne '(m,LISTEN\\s+(\\d+)/boothd, || /\"boothd\".*pid=(\\d+)/) and kill 15, $1'", shell=True) def get_tempfile(self, identity): From 0fb5fede417de452ef9791852832fd5d53de6d3c Mon Sep 17 00:00:00 2001 From: Jan Friesse Date: Thu, 12 Oct 2023 11:03:20 +0200 Subject: [PATCH 05/12] build: Add gitlog-to-changelog Instead of hand-written changelog export changelog from git during tarball creation. Signed-off-by: Jan Friesse --- ChangeLog | 44 -------- Makefile.am | 15 ++- build-aux/gitlog-to-changelog | 191 ++++++++++++++++++++++++++++++++++ 3 files changed, 204 insertions(+), 46 deletions(-) delete mode 100644 ChangeLog create mode 100755 build-aux/gitlog-to-changelog diff --git a/ChangeLog b/ChangeLog deleted file mode 100644 index 798ca286..00000000 --- a/ChangeLog +++ /dev/null @@ -1,44 +0,0 @@ -* Mar Jan 16 2016 Dejan Muhamedagic and others -- stable release 1.0 -- systemd: add booth-arbitrator.service (bsc#967036) -- main: improve address matching procedure - -* Mon Jan 11 2016 Dejan Muhamedagic and others -- release candidate 1.0 rc1 -- main: prevent segfault on no arguments -- ticket: term 0 is a valid term (bsc#952426) -- main: add 'other' as possible site reference -- arbitrator: mark expired tickets as lost (bsc#956321) -- geo attributes support -- booth-keygen: key generate auxiliary program -- ticket: prevent running external program twice -- ticket: make sure that we're the leader if granting ticket (bsc#940037) -- docs: add booth processing FSM dot graphs -- main: add booth peers command -- main: don't allow zero poll timeout (bsc#938820) -- pcmk: don't log error when ticket not in CIB -- client: fix memory leak in ticket list -- client: fix memory growing indefinitely on new client connect -- extprog: ignore running external program on revoke -- client: make sure that the client is still there to be notified -- client: don't allow SIGPIPE to kill the server (if the client leaves too early) -- extprog: preserve child exit status -- extprog: run programs asynchronously -- clients: fix memory leak when removing client -- transport: use non-blocking read -- hmac based authentication support -- ticket: ignore late MY_INDEX requests -- contrib: add geo-cluster.fwd (suse firewall rules) -- ticket: restart elections after last candidate disappears -- ticket: add -C option (wait for commit) -- raft: handle duplicate ticket release requests -- booth-site: exit early in start if daemon cannot start -- raft: ignore messages with invalid term (lower than already committed) -- raft: better control of term increment -- ticket: allow finer resolution time in messages -- use subsecond timers internally -- booth-arbitrator: fix exit codes for all actions -- booth-arbitrator: set the right LSB exit code in status -- booth-arbitrator: fix stop exit code (bnc#914306) -- booth-arbitrator: update exit codes -- main: use /proc/self/oom_score_adj instead of oom_adj (bnc#914037) diff --git a/Makefile.am b/Makefile.am index f06e0885..a95f034a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -43,7 +43,8 @@ EXTRA_DIST = autogen.sh conf/booth.conf.example \ unit-tests \ contrib \ $(SPEC).in booth-rpmlintrc \ - .version build-aux/git-version-gen build-aux/PKG_CHECK_VAR.m4 + .version build-aux/git-version-gen build-aux/PKG_CHECK_VAR.m4 \ + build-aux/gitlog-to-changelog AUTOMAKE_OPTIONS = foreign @@ -126,7 +127,7 @@ BUILT_SOURCES = .version .version: echo $(VERSION) > $@-t && mv $@-t $@ -dist-hook: +dist-hook: gen-ChangeLog echo $(VERSION) > $(distdir)/.tarball-version @@ -234,3 +235,13 @@ srpm: clean rpm: clean $(MAKE) $(SPEC) $(TARFILE) rpmbuild $(RPMBUILDOPTS) -ba $(SPEC) + +gen_start_date = 2000-01-01 +.PHONY: gen-ChangeLog +gen-ChangeLog: + if test -d $(abs_srcdir)/.git; then \ + LC_ALL=C $(top_srcdir)/build-aux/gitlog-to-changelog \ + --since=$(gen_start_date) > $(distdir)/cl-t; \ + rm -f $(distdir)/ChangeLog; \ + mv $(distdir)/cl-t $(distdir)/ChangeLog; \ + fi diff --git a/build-aux/gitlog-to-changelog b/build-aux/gitlog-to-changelog new file mode 100755 index 00000000..7660af51 --- /dev/null +++ b/build-aux/gitlog-to-changelog @@ -0,0 +1,191 @@ +eval '(exit $?0)' && eval 'exec perl -wS "$0" ${1+"$@"}' + & eval 'exec perl -wS "$0" $argv:q' + if 0; +# Convert git log output to ChangeLog format. + +my $VERSION = '2009-10-30 13:46'; # UTC +# The definition above must lie within the first 8 lines in order +# for the Emacs time-stamp write hook (at end) to update it. +# If you change this file with Emacs, please let the write hook +# do its job. Otherwise, update this string manually. + +# Copyright (C) 2008-2010 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# Written by Jim Meyering + +use strict; +use warnings; +use Getopt::Long; +use POSIX qw(strftime); + +(my $ME = $0) =~ s|.*/||; + +# use File::Coda; # http://meyering.net/code/Coda/ +END { + defined fileno STDOUT or return; + close STDOUT and return; + warn "$ME: failed to close standard output: $!\n"; + $? ||= 1; +} + +sub usage ($) +{ + my ($exit_code) = @_; + my $STREAM = ($exit_code == 0 ? *STDOUT : *STDERR); + if ($exit_code != 0) + { + print $STREAM "Try `$ME --help' for more information.\n"; + } + else + { + print $STREAM < ChangeLog + $ME -- -n 5 foo > last-5-commits-to-branch-foo + +EOF + } + exit $exit_code; +} + +# If the string $S is a well-behaved file name, simply return it. +# If it contains white space, quotes, etc., quote it, and return the new string. +sub shell_quote($) +{ + my ($s) = @_; + if ($s =~ m![^\w+/.,-]!) + { + # Convert each single quote to '\'' + $s =~ s/\'/\'\\\'\'/g; + # Then single quote the string. + $s = "'$s'"; + } + return $s; +} + +sub quoted_cmd(@) +{ + return join (' ', map {shell_quote $_} @_); +} + +{ + my $since_date = '1970-01-01 UTC'; + my $format_string = '%s%n%b%n'; + GetOptions + ( + help => sub { usage 0 }, + version => sub { print "$ME version $VERSION\n"; exit }, + 'since=s' => \$since_date, + 'format=s' => \$format_string, + ) or usage 1; + + my @cmd = (qw (git log --log-size), "--since=$since_date", + '--pretty=format:%ct %an <%ae>%n%n'.$format_string, @ARGV); + open PIPE, '-|', @cmd + or die ("$ME: failed to run `". quoted_cmd (@cmd) ."': $!\n" + . "(Is your Git too old? Version 1.5.1 or later is required.)\n"); + + my $prev_date_line = ''; + while (1) + { + defined (my $in = ) + or last; + $in =~ /^log size (\d+)$/ + or die "$ME:$.: Invalid line (expected log size):\n$in"; + my $log_nbytes = $1; + + my $log; + my $n_read = read PIPE, $log, $log_nbytes; + $n_read == $log_nbytes + or die "$ME:$.: unexpected EOF\n"; + + my @line = split "\n", $log; + my $author_line = shift @line; + defined $author_line + or die "$ME:$.: unexpected EOF\n"; + $author_line =~ /^(\d+) (.*>)$/ + or die "$ME:$.: Invalid line " + . "(expected date/author/email):\n$author_line\n"; + + my $date_line = sprintf "%s $2\n", strftime ("%F", localtime ($1)); + # If this line would be the same as the previous date/name/email + # line, then arrange not to print it. + if ($date_line ne $prev_date_line) + { + $prev_date_line eq '' + or print "\n"; + print $date_line; + } + $prev_date_line = $date_line; + + # Omit "Signed-off-by..." lines. + @line = grep !/^Signed-off-by: .*>$/, @line; + + # If there were any lines + if (@line == 0) + { + warn "$ME: warning: empty commit message:\n $date_line\n"; + } + else + { + # Remove leading and trailing blank lines. + while ($line[0] =~ /^\s*$/) { shift @line; } + while ($line[$#line] =~ /^\s*$/) { pop @line; } + + # Prefix each non-empty line with a TAB. + @line = map { length $_ ? "\t$_" : '' } @line; + + print "\n", join ("\n", @line), "\n"; + } + + defined ($in = ) + or last; + $in ne "\n" + and die "$ME:$.: unexpected line:\n$in"; + } + + close PIPE + or die "$ME: error closing pipe from " . quoted_cmd (@cmd) . "\n"; + # FIXME-someday: include $PROCESS_STATUS in the diagnostic +} + +# Local Variables: +# mode: perl +# indent-tabs-mode: nil +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "my $VERSION = '" +# time-stamp-format: "%:y-%02m-%02d %02H:%02M" +# time-stamp-time-zone: "UTC" +# time-stamp-end: "'; # UTC" +# End: From f0bbece8c82311fe90230a7f6b90f61ddff0f420 Mon Sep 17 00:00:00 2001 From: Jan Friesse Date: Thu, 12 Oct 2023 11:25:34 +0200 Subject: [PATCH 06/12] build: Add release.mk Makefile used to tag and create official tarballs. Signed-off-by: Jan Friesse --- Makefile.am | 2 +- build-aux/release.mk | 55 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 build-aux/release.mk diff --git a/Makefile.am b/Makefile.am index a95f034a..22b8a655 100644 --- a/Makefile.am +++ b/Makefile.am @@ -44,7 +44,7 @@ EXTRA_DIST = autogen.sh conf/booth.conf.example \ contrib \ $(SPEC).in booth-rpmlintrc \ .version build-aux/git-version-gen build-aux/PKG_CHECK_VAR.m4 \ - build-aux/gitlog-to-changelog + build-aux/gitlog-to-changelog build-aux/release.mk AUTOMAKE_OPTIONS = foreign diff --git a/build-aux/release.mk b/build-aux/release.mk new file mode 100644 index 00000000..355442ba --- /dev/null +++ b/build-aux/release.mk @@ -0,0 +1,55 @@ +# to build official release tarballs, handle tagging and publish. + +# example: +# make -f build-aux/release.mk all version=1.1 release=yes + +project=booth + +all: checks setup tag tarballs sha256 + +checks: +ifeq (,$(version)) + @echo ERROR: need to define version= + @exit 1 +endif + @if [ ! -d .git ]; then \ + echo This script needs to be executed from top level cluster git tree; \ + exit 1; \ + fi + + @if ! grep "fallback $(version)" configure.ac > /dev/null; then \ + echo "Don't forget update fallback version in configure.ac before release"; \ + exit 1; \ + fi + +setup: checks + ./autogen.sh + ./configure --without-glue + make maintainer-clean + +tag: setup ./tag-$(version) + +tag-$(version): +ifeq (,$(release)) + @echo Building test release $(version), no tagging +else + git tag -a -m "v$(version) release" v$(version) HEAD + @touch $@ +endif + +tarballs: tag + ./autogen.sh + ./configure --without-glue + BOOTH_RUNTESTS_ROOT_USER=1 make distcheck DISTCHECK_CONFIGURE_FLAGS="--without-glue" + +sha256: tarballs $(project)-$(version).sha256 + +$(project)-$(version).sha256: +ifeq (,$(release)) + @echo Building test release $(version), no sha256 +else + sha256sum $(project)-$(version)*tar* | sort -k2 > $@ +endif + +clean: + rm -rf $(project)-* tag-* From 6a3a0716340b34de45a68a85b41c813d2c98c4f5 Mon Sep 17 00:00:00 2001 From: Jan Friesse Date: Mon, 16 Oct 2023 17:56:47 +0200 Subject: [PATCH 07/12] build: Include icons in release tarballs Signed-off-by: Jan Friesse --- Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile.am b/Makefile.am index 22b8a655..19d654c3 100644 --- a/Makefile.am +++ b/Makefile.am @@ -42,6 +42,7 @@ EXTRA_DIST = autogen.sh conf/booth.conf.example \ test/utils.py \ unit-tests \ contrib \ + icons \ $(SPEC).in booth-rpmlintrc \ .version build-aux/git-version-gen build-aux/PKG_CHECK_VAR.m4 \ build-aux/gitlog-to-changelog build-aux/release.mk From f31669330ce73f3bbea6f99afec77d434da49524 Mon Sep 17 00:00:00 2001 From: Jan Friesse Date: Wed, 18 Oct 2023 11:31:55 +0200 Subject: [PATCH 08/12] build: Make distcheck work for non-root user make distcheck calls configure script with --prefix to allow installation to tmp directory. ocfdir is not using ${prefix} and instead contains absolute path (either taken from resource-agents.pc or hardcoded one) so make install fails when running as non-root user. Solution is taken from pacemaker project and it relies on setting AM_DISTCHECK_CONFIGURE_FLAGS so --with-ocfdir is added with directory where user has write permissions. Big thanks to Fabio M. Di Nitto for finding this solution. Signed-off-by: Jan Friesse --- Makefile.am | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile.am b/Makefile.am index 19d654c3..acd4c1e2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -54,6 +54,9 @@ MAINTAINERCLEANFILES = Makefile.in aclocal.m4 configure depcomp \ autoheader automake autoconf test_lense.sh \ compile +# Don't try to install files outside build directory for "make distcheck". +AM_DISTCHECK_CONFIGURE_FLAGS = --with-ocfdir="$$dc_install_base/lib/ocf" + dist_doc_DATA = AUTHORS README COPYING README.upgrade-from-v0.1 README-testing boothconfdir = ${BOOTHSYSCONFDIR} From e4fb8dae3a590a3d08eb2a132502b1852e42e550 Mon Sep 17 00:00:00 2001 From: Jan Friesse Date: Wed, 18 Oct 2023 15:41:05 +0200 Subject: [PATCH 09/12] build: Prepare version 1.1 release Signed-off-by: Jan Friesse --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index e8153377..c2ea2d76 100644 --- a/configure.ac +++ b/configure.ac @@ -5,7 +5,7 @@ AC_PREREQ([2.69]) AC_INIT([booth], - [m4_esyscmd([build-aux/git-version-gen --fallback 1.0 .tarball-version .gitarchivever])], + [m4_esyscmd([build-aux/git-version-gen --fallback 1.1 .tarball-version .gitarchivever])], [users@clusterlabs.org]) AC_USE_SYSTEM_EXTENSIONS From ad61e2c5bf277a4d4e0158a2bcc65d76a4eb9321 Mon Sep 17 00:00:00 2001 From: Dejan Muhamedagic Date: Sat, 14 Oct 2023 11:52:25 +0200 Subject: [PATCH 10/12] Feature: crmv1: make booth work like a crmv1-style cluster --- Makefile.am | 2 +- README.crmv1 | 49 +++++++++++ conf/booth.conf.example | 4 + conf/crmv1.conf.example | 15 ++++ script/crmv1 | 116 ++++++++++++++++++++++++++ src/booth.h | 4 +- src/config.c | 178 ++++++++++++++++++++++++++++++++++++++++ src/config.h | 2 + src/handler.c | 73 +++++++++------- 9 files changed, 409 insertions(+), 34 deletions(-) create mode 100644 README.crmv1 create mode 100644 conf/crmv1.conf.example create mode 100755 script/crmv1 diff --git a/Makefile.am b/Makefile.am index acd4c1e2..883d2f56 100644 --- a/Makefile.am +++ b/Makefile.am @@ -34,7 +34,7 @@ SPEC = $(PACKAGE_NAME).spec TARFILE = $(PACKAGE_NAME)-$(VERSION).tar.gz EXTRA_DIST = autogen.sh conf/booth.conf.example \ - script/booth-keygen script/lsb script/ocf script/service-runnable.in \ + script/booth-keygen script/lsb script/ocf script/service-runnable.in script/crmv1.in \ script/unit-test.py.in script/wireshark-dissector.lua \ test/arbtests.py test/assertions.py test/booth_path test/boothrunner.py \ test/boothtestenv.py.in test/clientenv.py test/clienttests.py test/live_test.sh \ diff --git a/README.crmv1 b/README.crmv1 new file mode 100644 index 00000000..b2708a2d --- /dev/null +++ b/README.crmv1 @@ -0,0 +1,49 @@ +CRMv1 cluster +============= + +Heartbeat is a predecessor to Pacemaker and here we make a +comeback to that kind of clustering. Why should we do that? +Firstly, Pacemaker became a behemoth, something that can brew +your coffee, but also something that is rather unwieldy and +difficult to manage. Secondly, booth is a very reliable +distributed engine and in our testing it was used also in a +typical LAN and passed all the tests with flying colours. So, +this is something for people who don't need all the bells and +whistles of Pacemaker, but still want to have HA. + +STONITH is missing, but the cluster must have at least three +members. Hence, the booth arbitrator serves as a fencing +replacement. This is as it should be: a two node cluster is +indeed very difficult to run. The booth arbitrator can be a +smallish instance running wherever in your network. As with +fencing, it doesn't even have to be particularly reliable, it +just have to be there when we need it. + +Setup +----- + +Just like with heartbeat, CRMv1 in booth is very simple to setup. +There is a helper program called `crmv1` which is going to handle +all the details. In the simplest setup, which is anyway the most +common, there is just one group. The resources are run in order, +there is no parallelism. + +Here the usage with one realistic example: + + Usage: crmv1 {group ...|group delete } + + Examples: + + crmv1 group bigdb \ + IPaddr ip=192.168.1.1 \ + ocf:linbit:drbd drbd_resource=bigdisk \ + Filesystem device=/dev/bigdisk directory=/bigdisk fstype=xfs \ + oracle sid=bigdb + + crmv1 group delete bigdb + +There is no monitoring of resources, but it is easy to run an +external monitor of the topmost resource, i.e. the service which +is actually used by the users. If that monitor fails, then it +makes sense to move the group to the other node. + diff --git a/conf/booth.conf.example b/conf/booth.conf.example index 1d63547c..440046a5 100644 --- a/conf/booth.conf.example +++ b/conf/booth.conf.example @@ -25,3 +25,7 @@ ticket="ticketA" ticket="ticketB" expire = 600 weights = 1,2,3 + +# Use the CRMv1 feature, i.e. make the booth a cluster in its own +# right (run resource, etc) +crmv1 diff --git a/conf/crmv1.conf.example b/conf/crmv1.conf.example new file mode 100644 index 00000000..e1082657 --- /dev/null +++ b/conf/crmv1.conf.example @@ -0,0 +1,15 @@ +# The crmv1 configuration file is "/etc/booth/crmv1/conf". You need to +# prepare the same configuration file on each arbitrator and +# each node in the cluster sites where the booth daemon can be launched. + +# The configuration consists of groups definition with parameters for resources +# It is recommended to use the crmv1 program to prepare this +# configuration file. +# Here is one example: + +group bigdb \ + IPaddr ip=192.168.1.1 \ + ocf:linbit:drbd drbd_resource=bigdisk \ + Filesystem device=/dev/bigdisk directory=/bigdisk fstype=xfs \ + oracle sid=bigdb + diff --git a/script/crmv1 b/script/crmv1 new file mode 100755 index 00000000..b9b04163 --- /dev/null +++ b/script/crmv1 @@ -0,0 +1,116 @@ +#!/bin/bash +# +# This is crmv1, a tool to configure booth as a crmv1 style +# cluster. +# It basically manages groups. There is no concept of a group in +# booth, but we can get by by using the before-acquire-handler. +# Essentially, the handler is used to run programs (resource +# agents). Just how the resource agents are configured is another +# matter. +# + +CONF_DIR=/etc/booth + +cnt=0 + +usage() { + cat<&2 + +Usage: $0 {group ...|group delete } + +Examples: + + crmv1 group bigdb \\ + IPaddr ip=192.168.1.1 \\ + ocf:linbit:drbd drbd_resource=bigdisk \\ + Filesystem device=/dev/bigdisk directory=/bigdisk fstype=xfs \\ + oracle sid=bigdb + + crmv1 group delete bigdb + +EOF + exit $1 +} +fatal() { + cat<&2 + +FATAL: $* + +EOF + exit 1 +} + +add_group() { + mkdir -p $CONF_DIR/crmv1/$2 + echo "$@" >> $CONF_DIR/crmv1/conf +} + +del_group() { + rm -rf $CONF_DIR/crmv1/$1 + sed -i "/group $1/d" $CONF_DIR/crmv1/conf +} + +get_ra() { + local ra + ra=$1 + set `echo $ra | sed 's/:/ /g'` + if [ $# -eq 1 ]; then + dir=/usr/lib/ocf/resource.d/heartbeat + else + # 1:2:3 + dir=/usr/lib/ocf/resource.d/$1/$2 + ra=$3 + fi + if [ -f $dir/$ra ]; then + echo $dir/$ra + else + fatal "no resource agent $1, did you install resource-agents?" + fi +} + +mk_link() { + ln -fs $2 $CONF_DIR/crmv1/$1/`printf '%02d' $3`_`basename $2` +} +ln_ra() { + ra_f=`get_ra $2` + mk_link $1 $ra_f $cnt + cnt=$((cnt+1)) +} + +# this is not really creating a group, we just parse the input to +# make sure that the group is well defined; the group is then +# created by boothd on starting; consider this a document on how +# creating a group should be implemented +new_group() { + group=$2 + shift 2 + for p; do + save_ra=$p + if echo $p | grep -qs '='; then + args="$args $p" + else + if [ "$save_ra" ]; then + ln_ra $group $save_ra + save_ra='' + continue + fi + fi + ln_ra $group $p + done + add_group group $group $@ +} + +if [ $# -lt 3 ]; then + usage 1 +fi +if [ $1 != group ]; then + usage 1 +fi +if [ $2 != delete ]; then + if grep -qs "^group $2" $CONF_DIR/crmv1/conf; then + fatal "group $2 already exists" + fi + new_group $@ +else + del_group $3 +fi diff --git a/src/booth.h b/src/booth.h index 0cd43c00..65824d3b 100644 --- a/src/booth.h +++ b/src/booth.h @@ -38,6 +38,7 @@ #define BOOTH_DEFAULT_CONF_EXT ".conf" #define BOOTH_DEFAULT_CONF \ BOOTH_DEFAULT_CONF_DIR BOOTH_DEFAULT_CONF_NAME BOOTH_DEFAULT_CONF_EXT +#define BOOTH_DEFAULT_CRMV1_CONF BOOTH_DEFAULT_CONF_DIR "crmv1/conf" #define DAEMON_NAME "boothd" #define BOOTH_PATH_LEN PATH_MAX @@ -380,7 +381,4 @@ extern struct command_line cl; _a > _b ? _a : _b; }) - - - #endif /* _BOOTH_H */ diff --git a/src/config.c b/src/config.c index f0ca4aa9..12d2a2f9 100644 --- a/src/config.c +++ b/src/config.c @@ -538,6 +538,178 @@ static int parse_attr_prereq(char *val, struct ticket_config *tk) extern int poll_timeout; +void +get_keyval(char *key, char *val, struct args *a) { + char *p; + + strncpy(a->key, key, 16); + p = skip_while(val, isspace); + *(p-1) = '\0'; + strncpy(a->val, val, 16); +} + +struct crmv1_group { + char name[16]; + char ra[128]; + struct args { + char *key[16]; + char *val[16]; + } args[16]; +}; + +#define OCF_HB_PATH "/usr/lib/ocf/resource.d/heartbeat/" +#define OCF_PATH "/usr/lib/ocf/resource.d/" + +void ln_ra(char *ra, char *s, int cnt) +{ + char *p, *q, *r; + int fd; + char ra_target_s[128]; + + p = s; + q = strchr(":", s); + if (!q) { + strcpy(ra, OCF_HB_PATH); + strncpy(ra+strlen(OCF_HB_PATH), s, 128-strlen(OCF_HB_PATH)); + r = s; + } else { + /* s -> p ':' q ':' r + * copy to ra + */ + *q = '\0'; q++; + strcpy(ra, OCF_PATH); + strncpy(ra+strlen(OCF_PATH), p, 128-strlen(OCF_PATH)); + *(q-p+1) = '/'; + strncpy(ra+strlen(OCF_PATH)+1, q, 128-strlen(OCF_PATH)-strlen(q)); + r = strchr(":", q); + *r = '\0'; r++; + strncat(ra, r, 128-strlen(OCF_PATH)); + } + if (strlen(p) >= 128) { + log_error("RA name too long: %s", s); + exit(1); + } + /* now test if there is a file containing this RA + */ + if (!(fd = open(ra))) { + log_error("RA does not exist: %s", s); + exit(1); + } + close(fd); + /* finally, create a soft link + */ + if (snprintf(ra_target_s, 128, "%02d_%s", cnt, r) >= 128) { + log_error("RA name too long: %s", s); + exit(1); + } + if (symlink(BOOTH_DEFAULT_CRMV1_CONF, ra_target_s) != 0) { + log_error("failed to symlink %s: %s", ra_target_s, + strerror(errno)); + exit(1); + } +} + +/* mimic the shell parsing + */ + +int parse_crmv1_conf(struct ticket_config *current_tk) +{ + struct crmv1_group *groups[16], *curr_group; + char line[1024], *buf; + char error_str_buf[1024]; + FILE *fp; + char *s, *key, *val; + const char *error; + char *save_ra; + int i, grp_i = 0, key_i = 0, in_key, grp_wait; + int cnt = 0, args_cnt = 0; + + curr_group = groups[0]; + fp = fopen(BOOTH_DEFAULT_CRMV1_CONF, "r"); + if (!fp) { + log_error("failed to open %s: %s", BOOTH_DEFAULT_CRMV1_CONF, + strerror(errno)); + return -1; + } + + log_debug("reading config file %s", BOOTH_DEFAULT_CRMV1_CONF); + /* make one long line */ + while (fgets(line, sizeof(line), fp)) { + s = skip_while(line, isspace); + if (is_end_of_line(s) || *s == '#') + continue; + /* is line continued? */ + if (*(s+strlen(s)-2) == '\\' && *(s+strlen(s)-1) == '\n') { + *(s+strlen(s)-2) = ' '; + } + } + buf = line; + + /* now parse the line */ + for (s = buf; ; ) { + /* a '=' b or ra */ + s = skip_while(s, isspace); + save_ra = s; + if ( *s == '=' ) { + *s = '\0'; + s++; + get_keyval(save_ra, s, curr_group->args[args_cnt++]); + } else { + if ( save_ra ) { + ln_ra(curr_group->ra, save_ra, cnt); + curr_group->args[0] = NULL; + save_ra = NULL; + cnt++; + continue; + } + } + ln_ra(curr_group->ra, s, cnt); + cnt++; + + if (strcmp(key, "group") == 0) { + grp_wait = 1; + continue; + } + + (void)snprintf(error_str_buf, sizeof(error_str_buf), + "Unknown keyword \"%s\"", key); + error = error_str_buf; + goto err; + + curr_group++; + } + fclose(fp); + + /* Default: make config name match config filename. */ + if (!booth_conf->name[0]) { + cp = strrchr(path, '/'); + cp = cp ? cp+1 : (char *)path; + cp2 = strrchr(cp, '.'); + if (!cp2) + cp2 = cp + strlen(cp); + if (cp2-cp >= BOOTH_NAME_LEN) { + log_error("token too long"); + goto out; + } + strncpy(booth_conf->name, cp, cp2-cp); + *(booth_conf->name+(cp2-cp)) = '\0'; + } + + if (!postproc_ticket(current_tk)) { + goto out; + } + + return 0; + +err: + fclose(fp); +out: + log_error("%s in config file line %d", + error, lineno); + booth_conf->crmv1 = 0; + return -1; +} + int read_config(const char *path, int type) { char line[1024]; @@ -787,6 +959,12 @@ int read_config(const char *path, int type) continue; } + if (strcmp(key, "crmv1") == 0) { + if ( !parse_crmv1_conf() ) + booth_conf->crmv1 = 1; + continue; + } + /* current_tk must be allocated at this point, otherwise * we don't know to which ticket the key refers */ diff --git a/src/config.h b/src/config.h index bca73bc7..834aa4e1 100644 --- a/src/config.h +++ b/src/config.h @@ -22,6 +22,7 @@ #include #include +#include #include "booth.h" #include "timer.h" #include "raft.h" @@ -321,6 +322,7 @@ struct booth_config { int ticket_count; int ticket_allocated; struct ticket_config *ticket; + int crmv1; }; extern struct booth_config *booth_conf; diff --git a/src/handler.c b/src/handler.c index a12857eb..2f6afbcd 100644 --- a/src/handler.c +++ b/src/handler.c @@ -64,17 +64,48 @@ closefiles(void) } } +static void +wait4proc(struct ticket_config *tk, char *prog) { + int rv, status; + + while (waitpid(curr_pid, &status, 0) != curr_pid) + ; + curr_pid = 0; + if (!ignore_status) { + rv = test_exit_status(tk, prog, status, 1); + if (rv) + _exit(rv); + } else { + /* + * To make ignore_rest function signal safe log_info + * must be removed from signal function. Information + * about signal delivery is important so put it here. + */ + log_info("external programs handler caught TERM, ignoring " + "status of external test programs"); + } + static void run_ext_prog(struct ticket_config *tk, char *prog) { - if (set_booth_env(tk)) { + int status, rv; + + switch(curr_pid=fork()) { + case -1: + log_error("fork: %s", strerror(errno)); _exit(1); + case 0: /* child */ + if (set_booth_env(tk)) { + _exit(1); + } + closefiles(); /* don't leak open files */ + tk_log_debug("running handler %s", prog); + execv(prog, tk_test.argv); + tk_log_error("%s: execv failed (%s)", prog, strerror(errno)); + _exit(1); + default: /* parent */ + wait4proc(struct ticket_config *tk, char *prog); } - closefiles(); /* don't leak open files */ - tk_log_debug("running handler %s", prog); - execv(prog, tk_test.argv); - tk_log_error("%s: execv failed (%s)", prog, strerror(errno)); - _exit(1); } static int @@ -214,30 +245,9 @@ process_ext_dir(struct ticket_config *tk) strcpy(prog, tk_test.path); strcat(prog, "/"); strcat(prog, dp->d_name); - switch(curr_pid=fork()) { - case -1: - log_error("fork: %s", strerror(errno)); - _exit(1); - case 0: /* child */ - run_ext_prog(tk, prog); - break; /* run_ext_prog effectively noreturn */ - default: /* parent */ - while (waitpid(curr_pid, &status, 0) != curr_pid) - ; - curr_pid = 0; - if (!ignore_status) { - rv = test_exit_status(tk, prog, status, 1); - if (rv) - _exit(rv); - } else { - /* - * To make ignore_rest function signal safe log_info - * must be removed from signal function. Information - * about signal delivery is important so put it here. - */ - log_info("external programs handler caught TERM, ignoring " - "status of external test programs"); - } + run_ext_prog(tk, prog); + if (booth_conf->crmv1) { + wait4proc(struct ticket_config *tk, char *prog); } } _exit(0); @@ -277,6 +287,9 @@ int run_handler(struct ticket_config *tk) tk_test.pid = pid; set_progstate(tk, EXTPROG_RUNNING); rv = RUNCMD_MORE; /* program runs */ + if (booth_conf->crmv1) { + wait4proc(struct ticket_config *tk, char *prog); + } } return rv; From 7bf1855d08b21e2931b58068743ef60ecf80991f Mon Sep 17 00:00:00 2001 From: Dejan Muhamedagic Date: Sun, 22 Oct 2023 18:19:32 +0200 Subject: [PATCH 11/12] add tests --- test/live_test.sh | 41 +++++++++++++++++++++++++++++++++- unit-tests/030_crmv1.txt | 48 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 unit-tests/030_crmv1.txt diff --git a/test/live_test.sh b/test/live_test.sh index bd60964d..485a32d3 100755 --- a/test/live_test.sh +++ b/test/live_test.sh @@ -1163,6 +1163,44 @@ applicable_attr_prereq_fail() { [ -n "`get_attr`" ] } +## TEST: crmv1_group_start ## + +add_crmv1_group() { + crmv1 group testgrp rsc1 Dummy rsc2 Dummy fake=test +} + +rm_crmv1_group() { + crmv1 group delete testgrp +} + +check_resources() { + export OCF_ROOT=/usr/lib/ocf + export OCF_RESOURCE_INSTANCE=rsc1 + . /usr/lib/ocf/lib/heartbeat/ocf-shellfuncs + /usr/lib/ocf/resource.d/heartbeat/Dummy monitor || return 1 + OCF_RESOURCE_INSTANCE=rsc2 + export OCF_RESKEY_fake=test + /usr/lib/ocf/resource.d/heartbeat/Dummy monitor || return 1 + return 0 +} + +# crmv1 start a group +setup_crmv1_group_start_ok() { + add_crmv1_group +} +test_crmv1_group_start_ok() { + wait_exp + wait_timeout +} +check_crmv1_group_start_ok() { + check_resources +} +recover_crmv1_group_start_ok() { + stop_site `get_site 1` + stop_site `get_site 2` + rm_crmv1_group +} + # # environment modifications # @@ -1251,7 +1289,8 @@ grant_site_lost grant_site_reappear revoke simultaneous_start_even slow_start_granted restart_granted reload_granted restart_granted_nocib restart_notgranted failover split_leader split_follower split_edge -external_prog_failed attr_prereq_ok attr_prereq_fail"} +external_prog_failed attr_prereq_ok attr_prereq_fail +crmv1_group_start"} : ${MANUAL_TESTS:="grant longgrant grant_noarb grant_elsewhere grant_site_lost diff --git a/unit-tests/030_crmv1.txt b/unit-tests/030_crmv1.txt new file mode 100644 index 00000000..42e6ed18 --- /dev/null +++ b/unit-tests/030_crmv1.txt @@ -0,0 +1,48 @@ +# vim: ft=sh et : +# +# Testing crmv1 groups + + +ticket: + name "tick1" + state ST_LEADER + current_term 40 + leader local + # may keep ticket all the time + term_duration 3000 + # but shall start renewal now + term_expires time(0) + 1000 + req_sent_at time(0) - 10 + + +gdb0: + call parse_extprog("test `set|grep ^BOOTH|wc -l` -ge 5", booth_conf->ticket+0) + +outgoing0: + header.cmd OP_HEARTBEAT + + +testgrp: + call parse_extprog("bin/crmv1") + ext_verifier 'bin/crmv1' + # cause re-query of the verifier + req_sent_at time(0) - 10 + +# +#gdb1: +# break ticket_broadcast_proposed_state § commands § bt § c § end + + +outgoing1: + header.cmd OP_HEARTBEAT + + +# now say that we may not have it anymore. +ticket2: + ext_verifier 'test "$BOOTH_TICKET" == "tick2FOO"' + # cause re-query of the verifier + req_sent_at time(0) - 10 + +finally: + state ST_LEADER + leader local From 72ece9763ad06c683ac52d0506d3c8bac26dc31f Mon Sep 17 00:00:00 2001 From: Dejan Muhamedagic Date: Mon, 23 Oct 2023 17:46:08 +0200 Subject: [PATCH 12/12] add the script for unit tests --- unit-tests/bin/checkcrmv1 | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100755 unit-tests/bin/checkcrmv1 diff --git a/unit-tests/bin/checkcrmv1 b/unit-tests/bin/checkcrmv1 new file mode 100755 index 00000000..94396cfa --- /dev/null +++ b/unit-tests/bin/checkcrmv1 @@ -0,0 +1,39 @@ +#!/bin/sh + +add_crmv1_group() { + crmv1 group testgrp rsc1 Dummy rsc2 Dummy fake=test +} + +rm_crmv1_group() { + crmv1 group delete testgrp +} + +check_resources() { + export OCF_ROOT=/usr/lib/ocf + export OCF_RESOURCE_INSTANCE=rsc1 + . /usr/lib/ocf/lib/heartbeat/ocf-shellfuncs + /usr/lib/ocf/resource.d/heartbeat/Dummy monitor || return 1 + OCF_RESOURCE_INSTANCE=rsc2 + export OCF_RESKEY_fake=test + /usr/lib/ocf/resource.d/heartbeat/Dummy monitor || return 1 + return 0 +} + +# crmv1 start a group +setup_crmv1_group_start_ok() { + add_crmv1_group +} +test_crmv1_group_start_ok() { + wait_exp + wait_timeout +} +check_crmv1_group_start_ok() { + check_resources +} +recover_crmv1_group_start_ok() { + stop_site `get_site 1` + stop_site `get_site 2` + rm_crmv1_group +} + +check_crmv1_group_start_ok