Skip to content

Commit 2299701

Browse files
committed
Do not reset the electionElapsed if the node doesn't grant vote
If the local node receives a MsgVote message with higher term but it doesn't grant the vote; it turns into a follower, but it shouldn't reset the electionElapsed, to ensure it has higher priority to start a campaign in the next round of election. If we reject a node, it's highly likely we will reject it again if it immediately campaigns again. So it may waste a long time to elect a leader if we reset the electionElapsed. Signed-off-by: Benjamin Wang <[email protected]>
1 parent d475d7e commit 2299701

File tree

2 files changed

+57
-0
lines changed

2 files changed

+57
-0
lines changed

raft.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,12 +1058,24 @@ func (r *raft) poll(id uint64, t pb.MessageType, v bool) (granted int, rejected
10581058
}
10591059

10601060
func (r *raft) Step(m pb.Message) error {
1061+
var (
1062+
bakElectionElapsed int
1063+
bakRandomizedElectionTimeout int
1064+
higherTerm bool
1065+
)
1066+
10611067
// Handle the message term, which may result in our stepping down to a follower.
10621068
switch {
10631069
case m.Term == 0:
10641070
// local message
10651071
case m.Term > r.Term:
1072+
higherTerm = true
1073+
10661074
if m.Type == pb.MsgVote || m.Type == pb.MsgPreVote {
1075+
// backup the electionElapsed and randomizedElectionTimeout, we
1076+
// will restore the values if we don't grant the Vote.
1077+
bakElectionElapsed, bakRandomizedElectionTimeout = r.electionElapsed, r.randomizedElectionTimeout
1078+
10671079
force := bytes.Equal(m.Context, []byte(campaignTransfer))
10681080
inLease := r.checkQuorum && r.lead != None && r.electionElapsed < r.electionTimeout
10691081
if !force && inLease {
@@ -1221,6 +1233,18 @@ func (r *raft) Step(m pb.Message) error {
12211233
r.Vote = m.From
12221234
}
12231235
} else {
1236+
// If the local node receives a `MsgVote` message with higher
1237+
// term, but it doesn't grant the vote; it turns into a follower,
1238+
// but it shouldn't reset the electionElapsed, to ensure it
1239+
// has higher priority to start a campaign in the next round
1240+
// of election. If we reject a node, it's highly likely we
1241+
// will reject it again if it immediately campaigns again.
1242+
// So it may waste a long time to elect a leader if we reset
1243+
// the electionElapsed.
1244+
if higherTerm && m.Type == pb.MsgVote {
1245+
r.electionElapsed = bakElectionElapsed
1246+
r.randomizedElectionTimeout = bakRandomizedElectionTimeout
1247+
}
12241248
r.logger.Infof("%x [logterm: %d, index: %d, vote: %x] rejected %s from %x [logterm: %d, index: %d] at term %d",
12251249
r.id, lastID.term, lastID.index, r.Vote, m.Type, m.From, candLastID.term, candLastID.index, r.Term)
12261250
r.send(pb.Message{To: m.From, Term: r.Term, Type: voteRespMsgType(m.Type), Reject: true})

raft_test.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,6 +1241,39 @@ func TestPastElectionTimeout(t *testing.T) {
12411241
}
12421242
}
12431243

1244+
func TestElectionElapsedOnRejectVote(t *testing.T) {
1245+
testCases := []struct {
1246+
electionElapsed int
1247+
randomizedElectionTimeout int
1248+
}{
1249+
{18, 28},
1250+
{12, 30},
1251+
{7, 15},
1252+
}
1253+
1254+
for i, tc := range testCases {
1255+
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
1256+
storage := newTestMemoryStorage(withPeers(1, 2))
1257+
storage.Append(index(1).terms(1, 2, 3, 4, 5))
1258+
r := newTestRaft(1, 10, 1, storage)
1259+
1260+
term, index := r.raftLog.lastEntryID().term, r.raftLog.lastEntryID().index
1261+
r.Term = term
1262+
1263+
r.electionElapsed = tc.electionElapsed
1264+
r.randomizedElectionTimeout = tc.randomizedElectionTimeout
1265+
1266+
// ensure the MsgVote message has a higher term, but with stale data,
1267+
// so that the vote will be rejected.
1268+
err := r.Step(pb.Message{From: 2, To: 1, Term: term + 1, Type: pb.MsgVote, LogTerm: index - 1, Index: index - 1})
1269+
require.NoError(t, err)
1270+
1271+
require.Equal(t, tc.electionElapsed, r.electionElapsed)
1272+
require.Equal(t, tc.randomizedElectionTimeout, r.randomizedElectionTimeout)
1273+
})
1274+
}
1275+
}
1276+
12441277
// TestStepIgnoreOldTermMsg to ensure that the Step function ignores the message
12451278
// from old term and does not pass it to the actual stepX function.
12461279
func TestStepIgnoreOldTermMsg(t *testing.T) {

0 commit comments

Comments
 (0)