Skip to content

Commit bc285dd

Browse files
authored
Merge pull request #171 from pav-kv/track-inflight-commit
tracker: track in-flight commit index
2 parents ed26e90 + 0f9fe52 commit bc285dd

11 files changed

+63
-159
lines changed

raft.go

+16-10
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,10 @@ func (r *raft) sendAppend(to uint64) {
599599
// argument controls whether messages with no entries will be sent
600600
// ("empty" messages are useful to convey updated Commit indexes, but
601601
// are undesirable when we're sending multiple messages in a batch).
602+
//
603+
// TODO(pav-kv): make invocation of maybeSendAppend stateless. The Progress
604+
// struct contains all the state necessary for deciding whether to send a
605+
// message.
602606
func (r *raft) maybeSendAppend(to uint64, sendIfEmpty bool) bool {
603607
pr := r.trk.Progress[to]
604608
if pr.IsPaused() {
@@ -640,7 +644,8 @@ func (r *raft) maybeSendAppend(to uint64, sendIfEmpty bool) bool {
640644
Entries: ents,
641645
Commit: r.raftLog.committed,
642646
})
643-
pr.UpdateOnEntriesSend(len(ents), uint64(payloadsSize(ents)))
647+
pr.SentEntries(len(ents), uint64(payloadsSize(ents)))
648+
pr.SentCommit(r.raftLog.committed)
644649
return true
645650
}
646651

@@ -675,21 +680,21 @@ func (r *raft) maybeSendSnapshot(to uint64, pr *tracker.Progress) bool {
675680

676681
// sendHeartbeat sends a heartbeat RPC to the given peer.
677682
func (r *raft) sendHeartbeat(to uint64, ctx []byte) {
683+
pr := r.trk.Progress[to]
678684
// Attach the commit as min(to.matched, r.committed).
679685
// When the leader sends out heartbeat message,
680686
// the receiver(follower) might not be matched with the leader
681687
// or it might not have all the committed entries.
682688
// The leader MUST NOT forward the follower's commit to
683689
// an unmatched index.
684-
commit := min(r.trk.Progress[to].Match, r.raftLog.committed)
685-
m := pb.Message{
690+
commit := min(pr.Match, r.raftLog.committed)
691+
r.send(pb.Message{
686692
To: to,
687693
Type: pb.MsgHeartbeat,
688694
Commit: commit,
689695
Context: ctx,
690-
}
691-
692-
r.send(m)
696+
})
697+
pr.SentCommit(commit)
693698
}
694699

695700
// bcastAppend sends RPC, with entries to all peers that are not up-to-date
@@ -1480,7 +1485,6 @@ func stepLeader(r *raft, m pb.Message) error {
14801485
r.sendAppend(m.From)
14811486
}
14821487
} else {
1483-
oldPaused := pr.IsPaused()
14841488
// We want to update our tracking if the response updates our
14851489
// matched index or if the response can move a probing peer back
14861490
// into StateReplicate (see heartbeat_rep_recovers_from_probing.txt
@@ -1517,9 +1521,11 @@ func stepLeader(r *raft, m pb.Message) error {
15171521
// to respond to pending read index requests
15181522
releasePendingReadIndexMessages(r)
15191523
r.bcastAppend()
1520-
} else if oldPaused {
1521-
// If we were paused before, this node may be missing the
1522-
// latest commit index, so send it.
1524+
} else if r.id != m.From && pr.CanBumpCommit(r.raftLog.committed) {
1525+
// This node may be missing the latest commit index, so send it.
1526+
// NB: this is not strictly necessary because the periodic heartbeat
1527+
// messages deliver commit indices too. However, a message sent now
1528+
// may arrive earlier than the next heartbeat fires.
15231529
r.sendAppend(m.From)
15241530
}
15251531
// We've updated flow control information above, which may

testdata/confchange_v1_add_single.txt

+1-13
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ stabilize
7272
DEBUG 1 received MsgAppResp(rejected, hint: (index 0, term 0)) from 2 for index 3
7373
DEBUG 1 decreased progress of 2 to [StateProbe match=0 next=1]
7474
DEBUG 1 [firstindex: 3, commit: 4] sent snapshot[index: 4, term: 1] to 2 [StateProbe match=0 next=1]
75-
DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=1 paused pendingSnap=4]
75+
DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=5 paused pendingSnap=4]
7676
> 1 handling Ready
7777
Ready MustSync=false:
7878
Messages:
@@ -94,15 +94,3 @@ stabilize
9494
> 1 receiving messages
9595
2->1 MsgAppResp Term:1 Log:0/4
9696
DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 2 [StateSnapshot match=4 next=5 paused pendingSnap=4]
97-
> 1 handling Ready
98-
Ready MustSync=false:
99-
Messages:
100-
1->2 MsgApp Term:1 Log:1/4 Commit:4
101-
> 2 receiving messages
102-
1->2 MsgApp Term:1 Log:1/4 Commit:4
103-
> 2 handling Ready
104-
Ready MustSync=false:
105-
Messages:
106-
2->1 MsgAppResp Term:1 Log:0/4
107-
> 1 receiving messages
108-
2->1 MsgAppResp Term:1 Log:0/4

testdata/confchange_v2_add_double_auto.txt

+2-14
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ stabilize 1 2
9595
DEBUG 1 received MsgAppResp(rejected, hint: (index 0, term 0)) from 2 for index 3
9696
DEBUG 1 decreased progress of 2 to [StateProbe match=0 next=1]
9797
DEBUG 1 [firstindex: 3, commit: 4] sent snapshot[index: 4, term: 1] to 2 [StateProbe match=0 next=1]
98-
DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=1 paused pendingSnap=4]
98+
DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=5 paused pendingSnap=4]
9999
> 1 handling Ready
100100
Ready MustSync=false:
101101
Messages:
@@ -171,7 +171,7 @@ stabilize 1 3
171171
DEBUG 1 received MsgAppResp(rejected, hint: (index 0, term 0)) from 3 for index 3
172172
DEBUG 1 decreased progress of 3 to [StateProbe match=0 next=1]
173173
DEBUG 1 [firstindex: 3, commit: 5] sent snapshot[index: 5, term: 1] to 3 [StateProbe match=0 next=1]
174-
DEBUG 1 paused sending replication messages to 3 [StateSnapshot match=0 next=1 paused pendingSnap=5]
174+
DEBUG 1 paused sending replication messages to 3 [StateSnapshot match=0 next=6 paused pendingSnap=5]
175175
> 1 handling Ready
176176
Ready MustSync=false:
177177
Messages:
@@ -193,18 +193,6 @@ stabilize 1 3
193193
> 1 receiving messages
194194
3->1 MsgAppResp Term:1 Log:0/5
195195
DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 3 [StateSnapshot match=5 next=6 paused pendingSnap=5]
196-
> 1 handling Ready
197-
Ready MustSync=false:
198-
Messages:
199-
1->3 MsgApp Term:1 Log:1/5 Commit:5
200-
> 3 receiving messages
201-
1->3 MsgApp Term:1 Log:1/5 Commit:5
202-
> 3 handling Ready
203-
Ready MustSync=false:
204-
Messages:
205-
3->1 MsgAppResp Term:1 Log:0/5
206-
> 1 receiving messages
207-
3->1 MsgAppResp Term:1 Log:0/5
208196

209197
# Nothing else happens.
210198
stabilize

testdata/confchange_v2_add_double_implicit.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ stabilize 1 2
7878
DEBUG 1 received MsgAppResp(rejected, hint: (index 0, term 0)) from 2 for index 3
7979
DEBUG 1 decreased progress of 2 to [StateProbe match=0 next=1]
8080
DEBUG 1 [firstindex: 3, commit: 4] sent snapshot[index: 4, term: 1] to 2 [StateProbe match=0 next=1]
81-
DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=1 paused pendingSnap=4]
81+
DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=5 paused pendingSnap=4]
8282
> 1 handling Ready
8383
Ready MustSync=false:
8484
Messages:

testdata/confchange_v2_add_single_auto.txt

+1-13
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ stabilize
7373
DEBUG 1 received MsgAppResp(rejected, hint: (index 0, term 0)) from 2 for index 3
7474
DEBUG 1 decreased progress of 2 to [StateProbe match=0 next=1]
7575
DEBUG 1 [firstindex: 3, commit: 4] sent snapshot[index: 4, term: 1] to 2 [StateProbe match=0 next=1]
76-
DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=1 paused pendingSnap=4]
76+
DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=5 paused pendingSnap=4]
7777
> 1 handling Ready
7878
Ready MustSync=false:
7979
Messages:
@@ -95,15 +95,3 @@ stabilize
9595
> 1 receiving messages
9696
2->1 MsgAppResp Term:1 Log:0/4
9797
DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 2 [StateSnapshot match=4 next=5 paused pendingSnap=4]
98-
> 1 handling Ready
99-
Ready MustSync=false:
100-
Messages:
101-
1->2 MsgApp Term:1 Log:1/4 Commit:4
102-
> 2 receiving messages
103-
1->2 MsgApp Term:1 Log:1/4 Commit:4
104-
> 2 handling Ready
105-
Ready MustSync=false:
106-
Messages:
107-
2->1 MsgAppResp Term:1 Log:0/4
108-
> 1 receiving messages
109-
2->1 MsgAppResp Term:1 Log:0/4

testdata/confchange_v2_add_single_explicit.txt

+1-13
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ stabilize 1 2
7373
DEBUG 1 received MsgAppResp(rejected, hint: (index 0, term 0)) from 2 for index 3
7474
DEBUG 1 decreased progress of 2 to [StateProbe match=0 next=1]
7575
DEBUG 1 [firstindex: 3, commit: 4] sent snapshot[index: 4, term: 1] to 2 [StateProbe match=0 next=1]
76-
DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=1 paused pendingSnap=4]
76+
DEBUG 1 paused sending replication messages to 2 [StateSnapshot match=0 next=5 paused pendingSnap=4]
7777
> 1 handling Ready
7878
Ready MustSync=false:
7979
Messages:
@@ -95,18 +95,6 @@ stabilize 1 2
9595
> 1 receiving messages
9696
2->1 MsgAppResp Term:1 Log:0/4
9797
DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 2 [StateSnapshot match=4 next=5 paused pendingSnap=4]
98-
> 1 handling Ready
99-
Ready MustSync=false:
100-
Messages:
101-
1->2 MsgApp Term:1 Log:1/4 Commit:4
102-
> 2 receiving messages
103-
1->2 MsgApp Term:1 Log:1/4 Commit:4
104-
> 2 handling Ready
105-
Ready MustSync=false:
106-
Messages:
107-
2->1 MsgAppResp Term:1 Log:0/4
108-
> 1 receiving messages
109-
2->1 MsgAppResp Term:1 Log:0/4
11098

11199
# Check that we're not allowed to change membership again while in the joint state.
112100
# This leads to an empty entry being proposed instead (index 5 in the stabilize block

testdata/confchange_v2_replace_leader.txt

-16
Original file line numberDiff line numberDiff line change
@@ -143,18 +143,6 @@ stabilize
143143
4->1 MsgAppResp Term:1 Log:0/4
144144
> 1 receiving messages
145145
4->1 MsgAppResp Term:1 Log:0/4
146-
> 1 handling Ready
147-
Ready MustSync=false:
148-
Messages:
149-
1->4 MsgApp Term:1 Log:1/4 Commit:4
150-
> 4 receiving messages
151-
1->4 MsgApp Term:1 Log:1/4 Commit:4
152-
> 4 handling Ready
153-
Ready MustSync=false:
154-
Messages:
155-
4->1 MsgAppResp Term:1 Log:0/4
156-
> 1 receiving messages
157-
4->1 MsgAppResp Term:1 Log:0/4
158146

159147

160148
# Transfer leadership while in the joint config.
@@ -284,12 +272,10 @@ stabilize
284272
CommittedEntries:
285273
2/5 EntryNormal ""
286274
Messages:
287-
4->1 MsgApp Term:2 Log:2/5 Commit:4
288275
4->1 MsgApp Term:2 Log:2/5 Commit:5
289276
4->2 MsgApp Term:2 Log:2/5 Commit:5
290277
4->3 MsgApp Term:2 Log:2/5 Commit:5
291278
> 1 receiving messages
292-
4->1 MsgApp Term:2 Log:2/5 Commit:4
293279
4->1 MsgApp Term:2 Log:2/5 Commit:5
294280
> 2 receiving messages
295281
4->2 MsgApp Term:2 Log:2/5 Commit:5
@@ -302,7 +288,6 @@ stabilize
302288
2/5 EntryNormal ""
303289
Messages:
304290
1->4 MsgAppResp Term:2 Log:0/5
305-
1->4 MsgAppResp Term:2 Log:0/5
306291
> 2 handling Ready
307292
Ready MustSync=false:
308293
HardState Term:2 Vote:4 Commit:5
@@ -318,7 +303,6 @@ stabilize
318303
Messages:
319304
3->4 MsgAppResp Term:2 Log:0/5
320305
> 4 receiving messages
321-
1->4 MsgAppResp Term:2 Log:0/5
322306
1->4 MsgAppResp Term:2 Log:0/5
323307
2->4 MsgAppResp Term:2 Log:0/5
324308
3->4 MsgAppResp Term:2 Log:0/5

testdata/probe_and_replicate.txt

-60
Original file line numberDiff line numberDiff line change
@@ -513,18 +513,6 @@ stabilize 1 2
513513
2->1 MsgAppResp Term:8 Log:0/21
514514
> 1 receiving messages
515515
2->1 MsgAppResp Term:8 Log:0/21
516-
> 1 handling Ready
517-
Ready MustSync=false:
518-
Messages:
519-
1->2 MsgApp Term:8 Log:8/21 Commit:18
520-
> 2 receiving messages
521-
1->2 MsgApp Term:8 Log:8/21 Commit:18
522-
> 2 handling Ready
523-
Ready MustSync=false:
524-
Messages:
525-
2->1 MsgAppResp Term:8 Log:0/21
526-
> 1 receiving messages
527-
2->1 MsgAppResp Term:8 Log:0/21
528516

529517
stabilize 1 3
530518
----
@@ -579,18 +567,6 @@ stabilize 1 3
579567
3->1 MsgAppResp Term:8 Log:0/21
580568
> 1 receiving messages
581569
3->1 MsgAppResp Term:8 Log:0/21
582-
> 1 handling Ready
583-
Ready MustSync=false:
584-
Messages:
585-
1->3 MsgApp Term:8 Log:8/21 Commit:18
586-
> 3 receiving messages
587-
1->3 MsgApp Term:8 Log:8/21 Commit:18
588-
> 3 handling Ready
589-
Ready MustSync=false:
590-
Messages:
591-
3->1 MsgAppResp Term:8 Log:0/21
592-
> 1 receiving messages
593-
3->1 MsgAppResp Term:8 Log:0/21
594570

595571
stabilize 1 4
596572
----
@@ -674,18 +650,6 @@ stabilize 1 5
674650
5->1 MsgAppResp Term:8 Log:0/21
675651
> 1 receiving messages
676652
5->1 MsgAppResp Term:8 Log:0/21
677-
> 1 handling Ready
678-
Ready MustSync=false:
679-
Messages:
680-
1->5 MsgApp Term:8 Log:8/21 Commit:21
681-
> 5 receiving messages
682-
1->5 MsgApp Term:8 Log:8/21 Commit:21
683-
> 5 handling Ready
684-
Ready MustSync=false:
685-
Messages:
686-
5->1 MsgAppResp Term:8 Log:0/21
687-
> 1 receiving messages
688-
5->1 MsgAppResp Term:8 Log:0/21
689653

690654
stabilize 1 6
691655
----
@@ -741,18 +705,6 @@ stabilize 1 6
741705
6->1 MsgAppResp Term:8 Log:0/21
742706
> 1 receiving messages
743707
6->1 MsgAppResp Term:8 Log:0/21
744-
> 1 handling Ready
745-
Ready MustSync=false:
746-
Messages:
747-
1->6 MsgApp Term:8 Log:8/21 Commit:21
748-
> 6 receiving messages
749-
1->6 MsgApp Term:8 Log:8/21 Commit:21
750-
> 6 handling Ready
751-
Ready MustSync=false:
752-
Messages:
753-
6->1 MsgAppResp Term:8 Log:0/21
754-
> 1 receiving messages
755-
6->1 MsgAppResp Term:8 Log:0/21
756708

757709
stabilize 1 7
758710
----
@@ -816,15 +768,3 @@ stabilize 1 7
816768
7->1 MsgAppResp Term:8 Log:0/21
817769
> 1 receiving messages
818770
7->1 MsgAppResp Term:8 Log:0/21
819-
> 1 handling Ready
820-
Ready MustSync=false:
821-
Messages:
822-
1->7 MsgApp Term:8 Log:8/21 Commit:21
823-
> 7 receiving messages
824-
1->7 MsgApp Term:8 Log:8/21 Commit:21
825-
> 7 handling Ready
826-
Ready MustSync=false:
827-
Messages:
828-
7->1 MsgAppResp Term:8 Log:0/21
829-
> 1 receiving messages
830-
7->1 MsgAppResp Term:8 Log:0/21

testdata/snapshot_succeed_via_app_resp.txt

+2-13
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ stabilize 1
8787
> 1 receiving messages
8888
3->1 MsgHeartbeatResp Term:1 Log:0/0
8989
DEBUG 1 [firstindex: 12, commit: 11] sent snapshot[index: 11, term: 1] to 3 [StateProbe match=0 next=11]
90-
DEBUG 1 paused sending replication messages to 3 [StateSnapshot match=0 next=11 paused pendingSnap=11]
90+
DEBUG 1 paused sending replication messages to 3 [StateSnapshot match=0 next=12 paused pendingSnap=11]
9191
> 1 handling Ready
9292
Ready MustSync=false:
9393
Messages:
@@ -98,7 +98,7 @@ status 1
9898
----
9999
1: StateReplicate match=11 next=12
100100
2: StateReplicate match=11 next=12
101-
3: StateSnapshot match=0 next=11 paused pendingSnap=11
101+
3: StateSnapshot match=0 next=12 paused pendingSnap=11
102102

103103
# Follower applies the snapshot. Note how it reacts with a MsgAppResp upon completion.
104104
# The snapshot fully catches the follower up (i.e. there are no more log entries it
@@ -127,10 +127,6 @@ stabilize 1
127127
> 1 receiving messages
128128
3->1 MsgAppResp Term:1 Log:0/11
129129
DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 3 [StateSnapshot match=11 next=12 paused pendingSnap=11]
130-
> 1 handling Ready
131-
Ready MustSync=false:
132-
Messages:
133-
1->3 MsgApp Term:1 Log:1/11 Commit:11
134130

135131
status 1
136132
----
@@ -143,16 +139,9 @@ stabilize
143139
----
144140
> 2 receiving messages
145141
1->2 MsgHeartbeat Term:1 Log:0/0 Commit:11
146-
> 3 receiving messages
147-
1->3 MsgApp Term:1 Log:1/11 Commit:11
148142
> 2 handling Ready
149143
Ready MustSync=false:
150144
Messages:
151145
2->1 MsgHeartbeatResp Term:1 Log:0/0
152-
> 3 handling Ready
153-
Ready MustSync=false:
154-
Messages:
155-
3->1 MsgAppResp Term:1 Log:0/11
156146
> 1 receiving messages
157147
2->1 MsgHeartbeatResp Term:1 Log:0/0
158-
3->1 MsgAppResp Term:1 Log:0/11

testdata/snapshot_succeed_via_app_resp_behind.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ stabilize 1
124124
DEBUG 1 received MsgAppResp(rejected, hint: (index 5, term 1)) from 3 for index 10
125125
DEBUG 1 decreased progress of 3 to [StateProbe match=0 next=6]
126126
DEBUG 1 [firstindex: 11, commit: 12] sent snapshot[index: 12, term: 1] to 3 [StateProbe match=0 next=6]
127-
DEBUG 1 paused sending replication messages to 3 [StateSnapshot match=0 next=6 paused pendingSnap=12]
127+
DEBUG 1 paused sending replication messages to 3 [StateSnapshot match=0 next=13 paused pendingSnap=12]
128128
> 1 handling Ready
129129
Ready MustSync=false:
130130
Messages:
@@ -152,7 +152,7 @@ stabilize 1
152152
----
153153
> 1 receiving messages
154154
3->1 MsgAppResp Term:1 Log:0/11
155-
DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 3 [StateSnapshot match=11 next=12 paused pendingSnap=12]
155+
DEBUG 1 recovered from needing snapshot, resumed sending replication messages to 3 [StateSnapshot match=11 next=13 paused pendingSnap=12]
156156
> 1 handling Ready
157157
Ready MustSync=false:
158158
Messages:

0 commit comments

Comments
 (0)