Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions lib/dynflow/executors/sidekiq/redis_locking.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,7 @@ def release_orchestrator_lock
def wait_for_orchestrator_lock
mode = nil
loop do
active = ::Sidekiq.redis do |conn|
conn.set(REDIS_LOCK_KEY, @world.id, :ex => REDIS_LOCK_TTL, :nx => true)
end
active = try_acquire_orchestrator_lock
break if active
if mode.nil?
mode = :passive
Expand All @@ -54,6 +52,15 @@ def wait_for_orchestrator_lock
@logger.info('Acquired orchestrator lock, entering active mode.')
end

def try_acquire_orchestrator_lock
::Sidekiq.redis do |conn|
conn.set(REDIS_LOCK_KEY, @world.id, :ex => REDIS_LOCK_TTL, :nx => true)
end
rescue ::Redis::BaseError => e
@logger.error("Could not acquire orchestrator lock: #{e}")
nil
end

def reacquire_orchestrator_lock
case ::Sidekiq.redis { |conn| conn.eval REACQUIRE_SCRIPT, [REDIS_LOCK_KEY], [@world.id] }
when ACQUIRE_MISSING
Expand Down
62 changes: 61 additions & 1 deletion test/bats/sidekiq-orchestrator.bats
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ teardown() {
cd "$TEST_PIDDIR" || return 1
shopt -s nullglob
for pidfile in * ; do
kill -15 "$(cat "$pidfile")"
kill -9 "$(cat "$pidfile")"
done
)
cleanup_containers 1
Expand Down Expand Up @@ -116,3 +116,63 @@ teardown() {
timeout 30 bundle exec ruby examples/remote_executor.rb client 1
wait_for 1 1 grep -P 'dynflow: ExecutionPlan.*running >>.*stopped' "$(bg_output_file o1)"
}

@test "active orchestrator can survive a brief redis connection drop" {
cd "$(get_project_root)"

run_background 'o1' bundle exec sidekiq -r ./examples/remote_executor.rb -q dynflow_orchestrator -c 1
wait_for 30 1 grep 'dynflow: Acquired orchestrator lock, entering active mode.' "$(bg_output_file o1)"

run_background 'w1' bundle exec sidekiq -r ./examples/remote_executor.rb -q default
wait_for 5 1 grep 'dynflow: Finished performing validity checks' "$(bg_output_file o1)"

stop_redis
wait_for 30 1 grep 'Error connecting to Redis' "$(bg_output_file o1)"
start_redis

timeout 10 bundle exec ruby examples/remote_executor.rb client 1
wait_for 1 1 grep -P 'dynflow: ExecutionPlan.*running >>.*stopped' "$(bg_output_file o1)"
}

@test "active orchestrator can survive a longer redis connection drop" {
cd "$(get_project_root)"

run_background 'o1' bundle exec sidekiq -r ./examples/remote_executor.rb -q dynflow_orchestrator -c 1
wait_for 30 1 grep 'dynflow: Acquired orchestrator lock, entering active mode.' "$(bg_output_file o1)"

run_background 'w1' bundle exec sidekiq -r ./examples/remote_executor.rb -q default
wait_for 5 1 grep 'dynflow: Finished performing validity checks' "$(bg_output_file o1)"

stop_redis 1
wait_for 30 1 grep 'Error connecting to Redis' "$(bg_output_file o1)"
start_redis

wait_for 30 1 grep 'The orchestrator lock was lost, reacquired' "$(bg_output_file o1)"

timeout 10 bundle exec ruby examples/remote_executor.rb client 1
wait_for 1 1 grep -P 'dynflow: ExecutionPlan.*running >>.*stopped' "$(bg_output_file o1)"
}

@test "orchestrators can fail over if active one goes away during downtime" {
cd "$(get_project_root)"

run_background 'o1' bundle exec sidekiq -r ./examples/remote_executor.rb -q dynflow_orchestrator -c 1
wait_for 30 1 grep 'dynflow: Acquired orchestrator lock, entering active mode.' "$(bg_output_file o1)"

run_background 'o2' bundle exec sidekiq -r ./examples/remote_executor.rb -q dynflow_orchestrator -c 1
wait_for 30 1 grep 'dynflow: Orchestrator lock already taken, entering passive mode.' "$(bg_output_file o2)"

run_background 'w1' bundle exec sidekiq -r ./examples/remote_executor.rb -q default
wait_for 5 1 grep 'dynflow: Finished performing validity checks' "$(bg_output_file o1)"

stop_redis 1
wait_for 30 1 grep 'Error connecting to Redis' "$(bg_output_file o1)"
kill -15 "$(cat "$TEST_PIDDIR/o1.pid")"
start_redis

wait_for 120 1 grep 'dynflow: Acquired orchestrator lock, entering active mode.' "$(bg_output_file o2)"
wait_for 120 1 grep 'dynflow: Finished performing validity checks' "$(bg_output_file o2)"

timeout 10 bundle exec ruby examples/remote_executor.rb client 1
wait_for 1 1 grep -P 'dynflow: ExecutionPlan.*running >>.*stopped' "$(bg_output_file o2)"
}