Skip to content

Commit d0a1a11

Browse files
committed
1. add ready phase to etcd raft spec
2. update trace validation to include ready phase Signed-off-by: Joshua Zhang <[email protected]>
1 parent d6c0213 commit d0a1a11

16 files changed

+1120
-5492
lines changed

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ verify-mod-tidy:
2323
verify-genproto:
2424
PASSES="genproto" ./scripts/test.sh
2525

26+
.PHONY: trace-validation
27+
trace-validation:
28+
PASSES="trace-validation" ./scripts/test.sh
29+
2630
.PHONY: test
2731
test:
2832
PASSES="unit" ./scripts/test.sh $(GO_TEST_FLAGS)

bootstrap.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,5 +76,7 @@ func (rn *RawNode) Bootstrap(peers []Peer) error {
7676
for _, peer := range peers {
7777
rn.raft.applyConfChange(pb.ConfChange{NodeID: peer.ID, Type: pb.ConfChangeAddNode}.AsV2())
7878
}
79+
80+
traceBootstrap(rn.raft)
7981
return nil
8082
}

raft.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1963,6 +1963,8 @@ func (r *raft) applyConfChange(cc pb.ConfChangeV2) pb.ConfState {
19631963
panic(err)
19641964
}
19651965

1966+
traceConfChangeEvent(cfg, r)
1967+
19661968
return r.switchToConfig(cfg, trk)
19671969
}
19681970

@@ -1973,8 +1975,6 @@ func (r *raft) applyConfChange(cc pb.ConfChangeV2) pb.ConfState {
19731975
//
19741976
// The inputs usually result from restoring a ConfState or applying a ConfChange.
19751977
func (r *raft) switchToConfig(cfg tracker.Config, trk tracker.ProgressMap) pb.ConfState {
1976-
traceConfChangeEvent(cfg, r)
1977-
19781978
r.trk.Config = cfg
19791979
r.trk.Progress = trk
19801980

rawnode.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,8 @@ func (rn *RawNode) Advance(_ Ready) {
491491
rn.stepsOnAdvance[i] = pb.Message{}
492492
}
493493
rn.stepsOnAdvance = rn.stepsOnAdvance[:0]
494+
495+
traceAdvance(rn.raft)
494496
}
495497

496498
// Status returns the current status of the given group. This allocates, see

state_trace.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ type stateMachineEventType int
3030

3131
const (
3232
rsmInitState stateMachineEventType = iota
33+
rsmBootstrap
3334
rsmBecomeCandidate
3435
rsmBecomeFollower
3536
rsmBecomeLeader
@@ -38,6 +39,7 @@ const (
3839
rsmChangeConf
3940
rsmApplyConfChange
4041
rsmReady
42+
rsmAdvance
4143
rsmSendAppendEntriesRequest
4244
rsmReceiveAppendEntriesRequest
4345
rsmSendAppendEntriesResponse
@@ -53,6 +55,7 @@ const (
5355
func (e stateMachineEventType) String() string {
5456
return []string{
5557
"InitState",
58+
"Bootstrap",
5659
"BecomeCandidate",
5760
"BecomeFollower",
5861
"BecomeLeader",
@@ -61,6 +64,7 @@ func (e stateMachineEventType) String() string {
6164
"ChangeConf",
6265
"ApplyConfChange",
6366
"Ready",
67+
"Advance",
6468
"SendAppendEntriesRequest",
6569
"ReceiveAppendEntriesRequest",
6670
"SendAppendEntriesResponse",
@@ -86,6 +90,7 @@ type TracingEvent struct {
8690
State TracingState `json:"state"`
8791
Role string `json:"role"`
8892
LogSize uint64 `json:"log"`
93+
Applied uint64 `json:"applied"`
8994
Conf [2][]string `json:"conf"`
9095
Message *TracingMessage `json:"msg,omitempty"`
9196
ConfChange *TracingConfChange `json:"cc,omitempty"`
@@ -173,6 +178,7 @@ func traceEvent(evt stateMachineEventType, r *raft, m *raftpb.Message, prop map[
173178
NodeID: strconv.FormatUint(r.id, 10),
174179
State: makeTracingState(r),
175180
LogSize: r.raftLog.lastIndex(),
181+
Applied: r.raftLog.applied,
176182
Conf: [2][]string{formatConf(r.trk.Voters[0].Slice()), formatConf(r.trk.Voters[1].Slice())},
177183
Role: r.state.String(),
178184
Message: makeTracingMessage(m),
@@ -206,10 +212,18 @@ func traceInitState(r *raft) {
206212
traceNodeEvent(rsmInitState, r)
207213
}
208214

215+
func traceBootstrap(r *raft) {
216+
traceNodeEvent(rsmBootstrap, r)
217+
}
218+
209219
func traceReady(r *raft) {
210220
traceNodeEvent(rsmReady, r)
211221
}
212222

223+
func traceAdvance(r *raft) {
224+
traceNodeEvent(rsmAdvance, r)
225+
}
226+
213227
func traceCommit(r *raft) {
214228
traceNodeEvent(rsmCommit, r)
215229
}

state_trace_nop.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,12 @@ type TracingEvent struct{}
2929

3030
func traceInitState(*raft) {}
3131

32+
func traceBootstrap(*raft) {}
33+
3234
func traceReady(*raft) {}
3335

36+
func traceAdvance(*raft) {}
37+
3438
func traceCommit(*raft) {}
3539

3640
func traceReplicate(*raft, ...raftpb.Entry) {}

tla/MCetcdraft.cfg

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,33 +12,31 @@
1212
\* See the License for the specific language governing permissions and
1313
\* limitations under the License.
1414
\*
15-
SPECIFICATION mc_etcdSpec
15+
SPECIFICATION MCSpec
1616

1717
CONSTANTS
18-
s1 = 1
19-
s2 = 2
20-
s3 = 3
21-
s4 = 4
22-
s5 = 5
18+
s1 = s1
19+
s2 = s2
20+
s3 = s3
21+
s4 = s4
22+
s5 = s5
2323

2424
InitServer = {s1, s2, s3}
2525
Server = {s1, s2, s3, s4}
2626

2727
ReconfigurationLimit = 2
28-
MaxTermLimit = 10
29-
RequestLimit = 5
30-
31-
Timeout <- MCTimeout
32-
Send <- MCSend
33-
ClientRequest <- MCClientRequest
34-
AddNewServer <- MCAddNewServer
35-
DeleteServer <- MCDeleteServer
36-
AddLearner <- MCAddLearner
37-
28+
MaxTermLimit = 3
29+
RequestLimit = 3
30+
MaxRestart = 2
31+
MaxStepDownToFollower = 1
32+
3833
InitServerVars <- etcdInitServerVars
3934
InitLogVars <- etcdInitLogVars
4035
InitConfigVars <- etcdInitConfigVars
4136

37+
IsEnabled <- MCIsEnabled
38+
PostAction <- MCPostAction
39+
4240
Nil = 0
4341

4442
ValueEntry = "ValueEntry"
@@ -65,5 +63,4 @@ INVARIANTS
6563
LogMatchingInv
6664
QuorumLogInv
6765
MoreUpToDateCorrectInv
68-
LeaderCompletenessInv
69-
CommittedIsDurableInv
66+
LeaderCompletenessInv

tla/MCetcdraft.tla

Lines changed: 80 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,39 @@
1414
\* limitations under the License.
1515
\*
1616

17-
EXTENDS etcdraft
17+
EXTENDS etcdraft_control
18+
19+
CONSTANTS s1,s2,s3,s4,s5
1820

1921
CONSTANT ReconfigurationLimit
2022
ASSUME ReconfigurationLimit \in Nat
2123

2224
CONSTANT MaxTermLimit
2325
ASSUME MaxTermLimit \in Nat
2426

27+
CONSTANT MaxRestart
28+
ASSUME MaxRestart \in Nat
29+
2530
\* Limit on client requests
2631
CONSTANT RequestLimit
2732
ASSUME RequestLimit \in Nat
2833

29-
etcd == INSTANCE etcdraft
34+
CONSTANT MaxStepDownToFollower
35+
ASSUME MaxStepDownToFollower \in Nat
36+
37+
etcd_control == INSTANCE etcdraft_control
38+
39+
VARIABLE restartCount
40+
VARIABLE stepDownCount
41+
constaintVars == <<restartCount, stepDownCount>>
42+
43+
mcVars == << vars, constaintVars >>
44+
45+
ChangeOneVar(_vars, index, op(_)) ==
46+
IF op(index) THEN
47+
UNCHANGED RemoveAt(_vars, index)
48+
ELSE
49+
UNCHANGED _vars
3050

3151
\* Application uses Node (instead of RawNode) will have multiple ConfigEntry entries appended to log in bootstrapping.
3252
BootstrapLog ==
@@ -42,79 +62,75 @@ etcdInitServerVars == /\ currentTerm = [i \in Server |-> IF i \in InitServer TH
4262
/\ votedFor = [i \in Server |-> Nil]
4363
etcdInitLogVars == /\ log = [i \in Server |-> IF i \in InitServer THEN BootstrapLog ELSE <<>>]
4464
/\ commitIndex = [i \in Server |-> IF i \in InitServer THEN Cardinality(InitServer) ELSE 0]
65+
/\ applied = [i \in Server |-> 0]
4566
etcdInitConfigVars == /\ config = [i \in Server |-> [ jointConfig |-> IF i \in InitServer THEN <<InitServer, {}>> ELSE <<{}, {}>>, learners |-> {}]]
46-
/\ reconfigCount = 0 \* the bootstrap configuraitons are not counted
67+
/\ appliedConfChange = [i \in Server |-> Len(BootstrapLog)]
4768

4869
\* This file controls the constants as seen below.
4970
\* In addition to basic settings of how many nodes are to be model checked,
5071
\* the model allows to place additional limitations on the state space of the program.
5172

52-
\* Limit the # of reconfigurations to ReconfigurationLimit
53-
MCAddNewServer(i, j) ==
54-
/\ reconfigCount < ReconfigurationLimit
55-
/\ etcd!AddNewServer(i, j)
56-
MCDeleteServer(i, j) ==
57-
/\ reconfigCount < ReconfigurationLimit
58-
/\ etcd!DeleteServer(i, j)
59-
MCAddLearner(i, j) ==
60-
/\ reconfigCount < ReconfigurationLimit
61-
/\ etcd!AddLearner(i, j)
62-
63-
\* Limit the terms that can be reached. Needs to be set to at least 3 to
64-
\* evaluate all relevant states. If set to only 2, the candidate_quorum
65-
\* constraint below is too restrictive.
66-
MCTimeout(i) ==
67-
\* Limit the term of each server to reduce state space
68-
/\ currentTerm[i] < MaxTermLimit
69-
\* Limit max number of simultaneous candidates
70-
\* We made several restrictions to the state space of Raft. However since we
71-
\* made these restrictions, Deadlocks can occur at places that Raft would in
72-
\* real-world deployments handle graciously.
73-
\* One example of this is if a Quorum of nodes becomes Candidate but can not
74-
\* timeout anymore since we constrained the terms. Then, an artificial Deadlock
75-
\* is reached. We solve this below. If TermLimit is set to any number >2, this is
76-
\* not an issue since breadth-first search will make sure that a similar
77-
\* situation is simulated at term==1 which results in a term increase to 2.
78-
/\ Cardinality({ s \in GetConfig(i) : state[s] = Candidate}) < 1
79-
/\ etcd!Timeout(i)
80-
81-
\* Limit number of requests (new entries) that can be made
82-
MCClientRequest(i, v) ==
83-
\* Allocation-free variant of Len(SelectSeq(log[i], LAMBDA e: e.contentType = TypeEntry)) < RequestLimit
84-
/\ FoldSeq(LAMBDA e, count: IF e.type = ValueEntry THEN count + 1 ELSE count, 0, log[i]) < RequestLimit
85-
/\ etcd!ClientRequest(i, v)
86-
87-
\* Limit how many identical append entries messages each node can send to another
88-
\* Limit number of duplicate messages sent to the same server
89-
MCSend(msg) ==
90-
\* One AppendEntriesRequest per node-pair at a time:
91-
\* a) No AppendEntries request from i to j.
92-
/\ ~ \E n \in DOMAIN messages \union DOMAIN pendingMessages:
93-
/\ n.mdest = msg.mdest
94-
/\ n.msource = msg.msource
95-
/\ n.mterm = msg.mterm
96-
/\ n.mtype = AppendEntriesRequest
97-
/\ msg.mtype = AppendEntriesRequest
98-
\* b) No (corresponding) AppendEntries response from j to i.
99-
/\ ~ \E n \in DOMAIN messages \union DOMAIN pendingMessages:
100-
/\ n.mdest = msg.msource
101-
/\ n.msource = msg.mdest
102-
/\ n.mterm = msg.mterm
103-
/\ n.mtype = AppendEntriesResponse
104-
/\ msg.mtype = AppendEntriesRequest
105-
/\ etcd!Send(msg)
106-
107-
mc_etcdSpec ==
108-
/\ Init
109-
/\ [][NextDynamic]_vars
110-
73+
\* Check the # of reconfigurations and return true if it is lower than ReconfigurationLimit
74+
UnderReconfigLimit ==
75+
LET leaders == {i \in Server : state[i] = Leader}
76+
IN
77+
\E i \in leaders :
78+
/\ \A j \in leaders \ {i} : Len(log[i]) >= Len(log[j])
79+
/\ ReconfigurationLimit > FoldSeq(LAMBDA x, y: IF x.type = ConfigEntry THEN 1 ELSE 0, -Cardinality(InitServer), log[i])
80+
81+
MCIsEnabled(action, args) ==
82+
CASE action \in { "AddNewServer", "DeleteServer", "AddLearner" } -> UnderReconfigLimit
83+
[] action = "Restart" -> restartCount < MaxRestart
84+
[] action = "Timeout" ->
85+
\* Limit the term of each server to reduce state space
86+
/\ currentTerm[args[1]] < MaxTermLimit
87+
\* Limit max number of simultaneous candidates
88+
\* We made several restrictions to the state space of Raft. However since we
89+
\* made these restrictions, Deadlocks can occur at places that Raft would in
90+
\* real-world deployments handle graciously.
91+
\* One example of this is if a Quorum of nodes becomes Candidate but can not
92+
\* timeout anymore since we constrained the terms. Then, an artificial Deadlock
93+
\* is reached. We solve this below. If TermLimit is set to any number >2, this is
94+
\* not an issue since breadth-first search will make sure that a similar
95+
\* situation is simulated at term==1 which results in a term increase to 2.
96+
/\ Cardinality({ s \in GetConfig(args[1]) : state[s] = Candidate}) < 1
97+
[] action = "ClientRequest" ->
98+
\* Allocation-free variant of Len(SelectSeq(log[i], LAMBDA e: e.contentType = TypeEntry)) < RequestLimit
99+
FoldSeq(LAMBDA e, count: IF e.type = ValueEntry THEN count + 1 ELSE count, 0, log[args[1]]) < RequestLimit
100+
[] action = "Ready" ->
101+
\* If there is no new message and no state change, we can skip Advance to reduce state spece
102+
\* to be explored
103+
LET rd == ReadyData(args[1])
104+
IN
105+
\/ rd.msgs /= EmptyBag
106+
\/ DurableStateFromReady(rd) /= durableState[args[1]]
107+
[] action = "StepDownToFollower" -> stepDownCount < MaxStepDownToFollower
108+
[] OTHER -> etcd_control!IsEnabled(action, args)
109+
110+
MCPostAction(action, args) ==
111+
CASE action = "Restart" ->
112+
/\ restartCount' = restartCount + 1
113+
/\ UNCHANGED <<stepDownCount>>
114+
[] action = "StepDownToFollower" ->
115+
/\ stepDownCount' = stepDownCount + 1
116+
/\ UNCHANGED <<restartCount>>
117+
[] OTHER -> UNCHANGED <<constaintVars>>
111118
\* Symmetry set over possible servers. May dangerous and is only enabled
112119
\* via the Symmetry option in cfg file.
113120
Symmetry == Permutations(Server)
114121

115122
\* Include all variables in the view, which is similar to defining no view.
116-
View == << vars >>
123+
View == mcVars
117124

118-
----
125+
InitConstraintVars ==
126+
/\ restartCount = 0
127+
/\ stepDownCount = 0
128+
129+
MCInit ==
130+
/\ Init
131+
/\ InitConstraintVars
132+
133+
MCSpec == MCInit /\ [][Controlled_Next]_mcVars
119134

135+
----
120136
===================================

tla/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ The TLA+ spec defines the desired behaviors of the model. To validate the correc
7272
./validate-model.sh -s ./MCetcdraft.tla -c ./MCetcdraft.cfg
7373
```
7474

75+
You can also add `-m` option to run model checking in simulation mode, which will randomly walk in the state machine and is helpful for finding issues quickly.
76+
77+
```console
78+
./validate-model.sh -m -s ./MCetcdraft.tla -c ./MCetcdraft.cfg
79+
```
7580

7681
## Validate Collected Traces
7782
With above example trace logger, validate.sh can be used to validate traces parallelly.

0 commit comments

Comments
 (0)