mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
functional/tester: delay after injecting "kill" to trigger election
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
This commit is contained in:
@@ -143,69 +143,97 @@ func (clus *Cluster) updateFailures() {
|
||||
for _, cs := range clus.Tester.FailureCases {
|
||||
switch cs {
|
||||
case "KILL_ONE_FOLLOWER":
|
||||
clus.failures = append(clus.failures, newFailureKillOneFollower())
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureKillOneFollower(clus))
|
||||
case "KILL_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT":
|
||||
clus.failures = append(clus.failures, newFailureKillOneFollowerUntilTriggerSnapshot())
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureKillOneFollowerUntilTriggerSnapshot(clus))
|
||||
case "KILL_LEADER":
|
||||
clus.failures = append(clus.failures, newFailureKillLeader())
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureKillLeader(clus))
|
||||
case "KILL_LEADER_UNTIL_TRIGGER_SNAPSHOT":
|
||||
clus.failures = append(clus.failures, newFailureKillLeaderUntilTriggerSnapshot())
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureKillLeaderUntilTriggerSnapshot(clus))
|
||||
case "KILL_QUORUM":
|
||||
clus.failures = append(clus.failures, newFailureKillQuorum())
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureKillQuorum(clus))
|
||||
case "KILL_ALL":
|
||||
clus.failures = append(clus.failures, newFailureKillAll())
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureKillAll(clus))
|
||||
|
||||
case "BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER":
|
||||
clus.failures = append(clus.failures, newFailureBlackholePeerPortTxRxOneFollower(clus))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureBlackholePeerPortTxRxOneFollower(clus))
|
||||
case "BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT":
|
||||
clus.failures = append(clus.failures, newFailureBlackholePeerPortTxRxOneFollowerUntilTriggerSnapshot())
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureBlackholePeerPortTxRxOneFollowerUntilTriggerSnapshot())
|
||||
case "BLACKHOLE_PEER_PORT_TX_RX_LEADER":
|
||||
clus.failures = append(clus.failures, newFailureBlackholePeerPortTxRxLeader(clus))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureBlackholePeerPortTxRxLeader(clus))
|
||||
case "BLACKHOLE_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT":
|
||||
clus.failures = append(clus.failures, newFailureBlackholePeerPortTxRxLeaderUntilTriggerSnapshot())
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureBlackholePeerPortTxRxLeaderUntilTriggerSnapshot())
|
||||
case "BLACKHOLE_PEER_PORT_TX_RX_QUORUM":
|
||||
clus.failures = append(clus.failures, newFailureBlackholePeerPortTxRxQuorum(clus))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureBlackholePeerPortTxRxQuorum(clus))
|
||||
case "BLACKHOLE_PEER_PORT_TX_RX_ALL":
|
||||
clus.failures = append(clus.failures, newFailureBlackholePeerPortTxRxAll(clus))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureBlackholePeerPortTxRxAll(clus))
|
||||
|
||||
case "DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER":
|
||||
clus.failures = append(clus.failures, newFailureDelayPeerPortTxRxOneFollower(clus, false))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureDelayPeerPortTxRxOneFollower(clus, false))
|
||||
case "RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER":
|
||||
clus.failures = append(clus.failures, newFailureDelayPeerPortTxRxOneFollower(clus, true))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureDelayPeerPortTxRxOneFollower(clus, true))
|
||||
case "DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT":
|
||||
clus.failures = append(clus.failures, newFailureDelayPeerPortTxRxOneFollowerUntilTriggerSnapshot(clus, false))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureDelayPeerPortTxRxOneFollowerUntilTriggerSnapshot(clus, false))
|
||||
case "RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT":
|
||||
clus.failures = append(clus.failures, newFailureDelayPeerPortTxRxOneFollowerUntilTriggerSnapshot(clus, true))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureDelayPeerPortTxRxOneFollowerUntilTriggerSnapshot(clus, true))
|
||||
case "DELAY_PEER_PORT_TX_RX_LEADER":
|
||||
clus.failures = append(clus.failures, newFailureDelayPeerPortTxRxLeader(clus, false))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureDelayPeerPortTxRxLeader(clus, false))
|
||||
case "RANDOM_DELAY_PEER_PORT_TX_RX_LEADER":
|
||||
clus.failures = append(clus.failures, newFailureDelayPeerPortTxRxLeader(clus, true))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureDelayPeerPortTxRxLeader(clus, true))
|
||||
case "DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT":
|
||||
clus.failures = append(clus.failures, newFailureDelayPeerPortTxRxLeaderUntilTriggerSnapshot(clus, false))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureDelayPeerPortTxRxLeaderUntilTriggerSnapshot(clus, false))
|
||||
case "RANDOM_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT":
|
||||
clus.failures = append(clus.failures, newFailureDelayPeerPortTxRxLeaderUntilTriggerSnapshot(clus, true))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureDelayPeerPortTxRxLeaderUntilTriggerSnapshot(clus, true))
|
||||
case "DELAY_PEER_PORT_TX_RX_QUORUM":
|
||||
clus.failures = append(clus.failures, newFailureDelayPeerPortTxRxQuorum(clus, false))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureDelayPeerPortTxRxQuorum(clus, false))
|
||||
case "RANDOM_DELAY_PEER_PORT_TX_RX_QUORUM":
|
||||
clus.failures = append(clus.failures, newFailureDelayPeerPortTxRxQuorum(clus, true))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureDelayPeerPortTxRxQuorum(clus, true))
|
||||
case "DELAY_PEER_PORT_TX_RX_ALL":
|
||||
clus.failures = append(clus.failures, newFailureDelayPeerPortTxRxAll(clus, false))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureDelayPeerPortTxRxAll(clus, false))
|
||||
case "RANDOM_DELAY_PEER_PORT_TX_RX_ALL":
|
||||
clus.failures = append(clus.failures, newFailureDelayPeerPortTxRxAll(clus, true))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureDelayPeerPortTxRxAll(clus, true))
|
||||
|
||||
case "NO_FAIL_WITH_STRESS":
|
||||
clus.failures = append(clus.failures, newFailureNoFailWithStress(clus))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureNoFailWithStress(clus))
|
||||
case "NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS":
|
||||
clus.failures = append(clus.failures, newFailureNoFailWithNoStressForLiveness(clus))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureNoFailWithNoStressForLiveness(clus))
|
||||
|
||||
case "EXTERNAL":
|
||||
clus.failures = append(clus.failures, newFailureExternal(clus.Tester.ExternalExecPath))
|
||||
clus.failures = append(clus.failures,
|
||||
newFailureExternal(clus.Tester.ExternalExecPath))
|
||||
case "FAILPOINTS":
|
||||
fpFailures, fperr := failpointFailures(clus)
|
||||
if len(fpFailures) == 0 {
|
||||
clus.lg.Info("no failpoints found!", zap.Error(fperr))
|
||||
}
|
||||
clus.failures = append(clus.failures, fpFailures...)
|
||||
clus.failures = append(clus.failures,
|
||||
fpFailures...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,6 +40,10 @@ func read(lg *zap.Logger, fpath string) (*Cluster, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(clus.Members) < 3 {
|
||||
return nil, fmt.Errorf("len(clus.Members) expects at least 3, got %d", len(clus.Members))
|
||||
}
|
||||
|
||||
for i, mem := range clus.Members {
|
||||
if mem.BaseDir == "" {
|
||||
return nil, fmt.Errorf("BaseDir cannot be empty (got %q)", mem.BaseDir)
|
||||
|
||||
@@ -242,9 +242,6 @@ func (f *failureUntilSnapshot) Inject(clus *Cluster) error {
|
||||
if err := f.Failure.Inject(clus); err != nil {
|
||||
return err
|
||||
}
|
||||
if len(clus.Members) < 3 {
|
||||
return nil
|
||||
}
|
||||
|
||||
snapshotCount := clus.Members[0].Etcd.SnapshotCount
|
||||
|
||||
|
||||
@@ -31,9 +31,9 @@ func (f *failureDelay) Inject(clus *Cluster) error {
|
||||
}
|
||||
if f.delayDuration > 0 {
|
||||
clus.lg.Info(
|
||||
"sleeping in failureDelay",
|
||||
"wait after inject",
|
||||
zap.Duration("delay", f.delayDuration),
|
||||
zap.String("case", f.Failure.Desc()),
|
||||
zap.String("desc", f.Failure.Desc()),
|
||||
)
|
||||
time.Sleep(f.delayDuration)
|
||||
}
|
||||
|
||||
@@ -24,50 +24,66 @@ func recoverKill(clus *Cluster, idx int) error {
|
||||
return clus.sendOperation(idx, rpcpb.Operation_RestartEtcd)
|
||||
}
|
||||
|
||||
func newFailureKillOneFollower() Failure {
|
||||
func newFailureKillOneFollower(clus *Cluster) Failure {
|
||||
ff := failureByFunc{
|
||||
failureCase: rpcpb.FailureCase_KILL_ONE_FOLLOWER,
|
||||
injectMember: injectKill,
|
||||
recoverMember: recoverKill,
|
||||
}
|
||||
return &failureFollower{ff, -1, -1}
|
||||
f := &failureFollower{ff, -1, -1}
|
||||
return &failureDelay{
|
||||
Failure: f,
|
||||
delayDuration: clus.GetFailureDelayDuration(),
|
||||
}
|
||||
}
|
||||
|
||||
func newFailureKillLeader() Failure {
|
||||
func newFailureKillLeader(clus *Cluster) Failure {
|
||||
ff := failureByFunc{
|
||||
failureCase: rpcpb.FailureCase_KILL_LEADER,
|
||||
injectMember: injectKill,
|
||||
recoverMember: recoverKill,
|
||||
}
|
||||
return &failureLeader{ff, -1, -1}
|
||||
f := &failureLeader{ff, -1, -1}
|
||||
return &failureDelay{
|
||||
Failure: f,
|
||||
delayDuration: clus.GetFailureDelayDuration(),
|
||||
}
|
||||
}
|
||||
|
||||
func newFailureKillQuorum() Failure {
|
||||
return &failureQuorum{
|
||||
func newFailureKillQuorum(clus *Cluster) Failure {
|
||||
f := &failureQuorum{
|
||||
failureCase: rpcpb.FailureCase_KILL_QUORUM,
|
||||
injectMember: injectKill,
|
||||
recoverMember: recoverKill,
|
||||
}
|
||||
return &failureDelay{
|
||||
Failure: f,
|
||||
delayDuration: clus.GetFailureDelayDuration(),
|
||||
}
|
||||
}
|
||||
|
||||
func newFailureKillAll() Failure {
|
||||
return &failureAll{
|
||||
func newFailureKillAll(clus *Cluster) Failure {
|
||||
f := &failureAll{
|
||||
failureCase: rpcpb.FailureCase_KILL_ALL,
|
||||
injectMember: injectKill,
|
||||
recoverMember: recoverKill,
|
||||
}
|
||||
return &failureDelay{
|
||||
Failure: f,
|
||||
delayDuration: clus.GetFailureDelayDuration(),
|
||||
}
|
||||
}
|
||||
|
||||
func newFailureKillOneFollowerUntilTriggerSnapshot() Failure {
|
||||
func newFailureKillOneFollowerUntilTriggerSnapshot(clus *Cluster) Failure {
|
||||
return &failureUntilSnapshot{
|
||||
failureCase: rpcpb.FailureCase_KILL_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT,
|
||||
Failure: newFailureKillOneFollower(),
|
||||
Failure: newFailureKillOneFollower(clus),
|
||||
}
|
||||
}
|
||||
|
||||
func newFailureKillLeaderUntilTriggerSnapshot() Failure {
|
||||
func newFailureKillLeaderUntilTriggerSnapshot(clus *Cluster) Failure {
|
||||
return &failureUntilSnapshot{
|
||||
failureCase: rpcpb.FailureCase_KILL_LEADER_UNTIL_TRIGGER_SNAPSHOT,
|
||||
Failure: newFailureKillLeader(),
|
||||
Failure: newFailureKillLeader(clus),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,13 +52,11 @@ func newFailureDelayPeerPortTxRxOneFollower(clus *Cluster, random bool) Failure
|
||||
injectMember: injectDelayPeerPortTxRx,
|
||||
recoverMember: recoverDelayPeerPortTxRx,
|
||||
}
|
||||
|
||||
clus.Tester.UpdatedDelayLatencyMs = clus.Tester.DelayLatencyMs
|
||||
if random {
|
||||
clus.UpdateDelayLatencyMs()
|
||||
ff.failureCase = rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER
|
||||
}
|
||||
|
||||
f := &failureFollower{ff, -1, -1}
|
||||
return &failureDelay{
|
||||
Failure: f,
|
||||
@@ -72,13 +70,11 @@ func newFailureDelayPeerPortTxRxOneFollowerUntilTriggerSnapshot(clus *Cluster, r
|
||||
injectMember: injectDelayPeerPortTxRx,
|
||||
recoverMember: recoverDelayPeerPortTxRx,
|
||||
}
|
||||
|
||||
clus.Tester.UpdatedDelayLatencyMs = clus.Tester.DelayLatencyMs
|
||||
if random {
|
||||
clus.UpdateDelayLatencyMs()
|
||||
ff.failureCase = rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT
|
||||
}
|
||||
|
||||
f := &failureFollower{ff, -1, -1}
|
||||
return &failureUntilSnapshot{
|
||||
failureCase: ff.failureCase,
|
||||
@@ -92,13 +88,11 @@ func newFailureDelayPeerPortTxRxLeader(clus *Cluster, random bool) Failure {
|
||||
injectMember: injectDelayPeerPortTxRx,
|
||||
recoverMember: recoverDelayPeerPortTxRx,
|
||||
}
|
||||
|
||||
clus.Tester.UpdatedDelayLatencyMs = clus.Tester.DelayLatencyMs
|
||||
if random {
|
||||
clus.UpdateDelayLatencyMs()
|
||||
ff.failureCase = rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_LEADER
|
||||
}
|
||||
|
||||
f := &failureLeader{ff, -1, -1}
|
||||
return &failureDelay{
|
||||
Failure: f,
|
||||
@@ -112,13 +106,11 @@ func newFailureDelayPeerPortTxRxLeaderUntilTriggerSnapshot(clus *Cluster, random
|
||||
injectMember: injectDelayPeerPortTxRx,
|
||||
recoverMember: recoverDelayPeerPortTxRx,
|
||||
}
|
||||
|
||||
clus.Tester.UpdatedDelayLatencyMs = clus.Tester.DelayLatencyMs
|
||||
if random {
|
||||
clus.UpdateDelayLatencyMs()
|
||||
ff.failureCase = rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_LEADER_UNTIL_TRIGGER_SNAPSHOT
|
||||
}
|
||||
|
||||
f := &failureLeader{ff, -1, -1}
|
||||
return &failureUntilSnapshot{
|
||||
failureCase: ff.failureCase,
|
||||
@@ -132,13 +124,11 @@ func newFailureDelayPeerPortTxRxQuorum(clus *Cluster, random bool) Failure {
|
||||
injectMember: injectDelayPeerPortTxRx,
|
||||
recoverMember: recoverDelayPeerPortTxRx,
|
||||
}
|
||||
|
||||
clus.Tester.UpdatedDelayLatencyMs = clus.Tester.DelayLatencyMs
|
||||
if random {
|
||||
clus.UpdateDelayLatencyMs()
|
||||
f.failureCase = rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_QUORUM
|
||||
}
|
||||
|
||||
return &failureDelay{
|
||||
Failure: f,
|
||||
delayDuration: clus.GetFailureDelayDuration(),
|
||||
@@ -151,13 +141,11 @@ func newFailureDelayPeerPortTxRxAll(clus *Cluster, random bool) Failure {
|
||||
injectMember: injectDelayPeerPortTxRx,
|
||||
recoverMember: recoverDelayPeerPortTxRx,
|
||||
}
|
||||
|
||||
clus.Tester.UpdatedDelayLatencyMs = clus.Tester.DelayLatencyMs
|
||||
if random {
|
||||
clus.UpdateDelayLatencyMs()
|
||||
f.failureCase = rpcpb.FailureCase_RANDOM_DELAY_PEER_PORT_TX_RX_ALL
|
||||
}
|
||||
|
||||
return &failureDelay{
|
||||
Failure: f,
|
||||
delayDuration: clus.GetFailureDelayDuration(),
|
||||
|
||||
Reference in New Issue
Block a user