@@ -579,8 +579,13 @@ leader(info, {Status, Node, InfoList}, State0)
579
579
when Status =:= nodedown orelse
580
580
Status =:= nodeup ->
581
581
handle_node_status_change (Node , Status , InfoList , ? FUNCTION_NAME , State0 );
582
- leader (info , {update_peer , PeerId , Update }, State0 ) ->
583
- State = update_peer (PeerId , Update , State0 ),
582
+ leader (info , {unsuspend_peer , PeerId }, State0 ) ->
583
+ State = case ra_server :peer_status (PeerId , State0 # state .server_state ) of
584
+ suspended ->
585
+ update_peer (PeerId , #{status => normal }, State0 );
586
+ _ ->
587
+ State0
588
+ end ,
584
589
{keep_state , State , []};
585
590
leader (_ , tick_timeout , State0 ) ->
586
591
{State1 , RpcEffs } = make_rpcs (State0 ),
@@ -1393,13 +1398,15 @@ handle_effects(RaftState, Effects0, EvtType, State0, Actions0) ->
1393
1398
{State , lists :reverse (Actions )}.
1394
1399
1395
1400
handle_effect (_RaftState , {send_rpc , To , Rpc }, _ ,
1396
- # state {conf = Conf } = State0 , Actions ) ->
1401
+ # state {conf = Conf ,
1402
+ server_state = SS } = State0 , Actions ) ->
1397
1403
% fully qualified use only so that we can mock it for testing
1398
1404
% TODO: review / refactor to remove the mod call here
1405
+ PeerStatus = ra_server :peer_status (To , SS ),
1399
1406
case ? MODULE :send_rpc (To , Rpc , State0 ) of
1400
1407
ok ->
1401
1408
{State0 , Actions };
1402
- nosuspend ->
1409
+ nosuspend when PeerStatus == normal ->
1403
1410
% % update peer status to suspended and spawn a process
1404
1411
% % to send the rpc without nosuspend so that it will block until
1405
1412
% % the data can get through
@@ -1410,11 +1417,13 @@ handle_effect(_RaftState, {send_rpc, To, Rpc}, _,
1410
1417
% % the peer status back to normal
1411
1418
ok = gen_statem :cast (To , Rpc ),
1412
1419
incr_counter (Conf , ? C_RA_SRV_MSGS_SENT , 1 ),
1413
- Self ! {update_peer , To , #{ status => normal } }
1420
+ Self ! {unsuspend_peer , To }
1414
1421
end ),
1415
- ? DEBUG (" ~ts : temporarily suspending peer ~w due to full distribution buffer" ,
1416
- [log_id (State0 ), To ]),
1422
+ % ?DEBUG("~ts: temporarily suspending peer ~w due to full distribution buffer ~W ",
1423
+ % [log_id(State0), To, Rpc, 5 ]),
1417
1424
{update_peer (To , #{status => suspended }, State0 ), Actions };
1425
+ nosuspend ->
1426
+ {State0 , Actions };
1418
1427
noconnect ->
1419
1428
% % for noconnects just allow it to pipeline and catch up later
1420
1429
{State0 , Actions }
@@ -1976,6 +1985,8 @@ send_snapshots(Id, Term, {_, ToNode} = To, ChunkSize,
1976
1985
Result = read_chunks_and_send_rpc (RPC , To , ReadState , 1 ,
1977
1986
ChunkSize , InstallTimeout ,
1978
1987
SnapState ),
1988
+ ? DEBUG (" ~ts : sending snapshot to ~w completed" ,
1989
+ [LogId , To ]),
1979
1990
ok = gen_statem :cast (Id , {To , Result })
1980
1991
end .
1981
1992
0 commit comments