From a43bd84631fc6780b61c61d4f77e37341f0e4244 Mon Sep 17 00:00:00 2001 From: Gyuho Lee Date: Wed, 4 Apr 2018 13:29:24 -0700 Subject: [PATCH] functional-tester/tester: implement liveness mode failure case Signed-off-by: Gyuho Lee --- tools/functional-tester/tester/cluster.go | 15 ++++-- .../tester/cluster_tester.go | 14 +++-- tools/functional-tester/tester/failure.go | 2 +- .../tester/failure_case_failpoints.go | 3 +- .../tester/failure_case_network_blackhole.go | 8 ++- .../tester/failure_case_network_slow.go | 6 +-- ..._case_no_op.go => failure_case_no_fail.go} | 52 +++++++++++++++---- 7 files changed, 71 insertions(+), 29 deletions(-) rename tools/functional-tester/tester/{failure_case_no_op.go => failure_case_no_fail.go} (60%) diff --git a/tools/functional-tester/tester/cluster.go b/tools/functional-tester/tester/cluster.go index 5abaf5b4b..01d6d36c6 100644 --- a/tools/functional-tester/tester/cluster.go +++ b/tools/functional-tester/tester/cluster.go @@ -294,10 +294,9 @@ func (clus *Cluster) updateFailures() { } clus.failures = append(clus.failures, fpFailures...) case "NO_FAIL_WITH_STRESS": - clus.failures = append(clus.failures, newFailureNoFailWithStress()) + clus.failures = append(clus.failures, newFailureNoFailWithStress(clus)) case "NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS": - // TODO - clus.failures = append(clus.failures, newFailureNoFailWithNoStressForLiveness()) + clus.failures = append(clus.failures, newFailureNoFailWithNoStressForLiveness(clus)) case "EXTERNAL": clus.failures = append(clus.failures, newFailureExternal(clus.Tester.ExternalExecPath)) } @@ -762,4 +761,12 @@ func (clus *Cluster) defrag() error { return nil } -func (clus *Cluster) Report() int64 { return clus.stresser.ModifiedKeys() } +// GetFailureDelayDuration computes failure delay duration. +func (clus *Cluster) GetFailureDelayDuration() time.Duration { + return time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond +} + +// Report reports the number of modified keys. +func (clus *Cluster) Report() int64 { + return clus.stresser.ModifiedKeys() +} diff --git a/tools/functional-tester/tester/cluster_tester.go b/tools/functional-tester/tester/cluster_tester.go index e657f0dba..032d44f0b 100644 --- a/tools/functional-tester/tester/cluster_tester.go +++ b/tools/functional-tester/tester/cluster_tester.go @@ -19,6 +19,8 @@ import ( "os" "time" + "github.com/coreos/etcd/tools/functional-tester/rpcpb" + "go.uber.org/zap" ) @@ -127,8 +129,10 @@ func (clus *Cluster) doRound() error { return fmt.Errorf("wait full health error: %v", err) } - // TODO: "NO_FAIL_WITH_STRESS" - // TODO: "NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS" + if fa.FailureCase() == rpcpb.FailureCase_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS { + clus.lg.Info("pausing stresser after before injecting failures") + clus.pauseStresser() + } clus.lg.Info( "injecting failure", @@ -165,8 +169,10 @@ func (clus *Cluster) doRound() error { zap.String("desc", fa.Desc()), ) - clus.lg.Info("pausing stresser after failure recovery, before wait health") - clus.pauseStresser() + if fa.FailureCase() != rpcpb.FailureCase_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS { + clus.lg.Info("pausing stresser after failure recovery, before wait health") + clus.pauseStresser() + } clus.lg.Info("wait health after recovering failures") if err := clus.WaitHealth(); err != nil { diff --git a/tools/functional-tester/tester/failure.go b/tools/functional-tester/tester/failure.go index b40f61245..3aec58e3e 100644 --- a/tools/functional-tester/tester/failure.go +++ b/tools/functional-tester/tester/failure.go @@ -223,7 +223,7 @@ func (f *failureUntilSnapshot) Desc() string { if f.desc.Desc() != "" { return f.desc.Desc() } - return f.failureCase.String() + " (to trigger snapshot)" + return f.failureCase.String() } func (f *failureUntilSnapshot) FailureCase() rpcpb.FailureCase { diff --git a/tools/functional-tester/tester/failure_case_failpoints.go b/tools/functional-tester/tester/failure_case_failpoints.go index f468a53c2..c55e34577 100644 --- a/tools/functional-tester/tester/failure_case_failpoints.go +++ b/tools/functional-tester/tester/failure_case_failpoints.go @@ -20,7 +20,6 @@ import ( "net/http" "strings" "sync" - "time" "github.com/coreos/etcd/tools/functional-tester/rpcpb" ) @@ -59,7 +58,7 @@ func failpointFailures(clus *Cluster) (ret []Failure, err error) { } else { fpFails[i] = &failureDelay{ Failure: fpf, - delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond, + delayDuration: clus.GetFailureDelayDuration(), } } } diff --git a/tools/functional-tester/tester/failure_case_network_blackhole.go b/tools/functional-tester/tester/failure_case_network_blackhole.go index c3a0f00c5..0f7581280 100644 --- a/tools/functional-tester/tester/failure_case_network_blackhole.go +++ b/tools/functional-tester/tester/failure_case_network_blackhole.go @@ -15,8 +15,6 @@ package tester import ( - "time" - "github.com/coreos/etcd/tools/functional-tester/rpcpb" ) @@ -37,7 +35,7 @@ func newFailureBlackholePeerPortTxRxOneFollower(clus *Cluster) Failure { f := &failureFollower{ff, -1, -1} return &failureDelay{ Failure: f, - delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond, + delayDuration: clus.GetFailureDelayDuration(), } } @@ -50,7 +48,7 @@ func newFailureBlackholePeerPortTxRxLeader(clus *Cluster) Failure { f := &failureLeader{ff, -1, -1} return &failureDelay{ Failure: f, - delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond, + delayDuration: clus.GetFailureDelayDuration(), } } @@ -62,6 +60,6 @@ func newFailureBlackholePeerPortTxRxAll(clus *Cluster) Failure { } return &failureDelay{ Failure: f, - delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond, + delayDuration: clus.GetFailureDelayDuration(), } } diff --git a/tools/functional-tester/tester/failure_case_network_slow.go b/tools/functional-tester/tester/failure_case_network_slow.go index db28206ad..274ba6383 100644 --- a/tools/functional-tester/tester/failure_case_network_slow.go +++ b/tools/functional-tester/tester/failure_case_network_slow.go @@ -47,7 +47,7 @@ func newFailureDelayPeerPortTxRxOneFollower(clus *Cluster) Failure { f := &failureFollower{ff, -1, -1} return &failureDelay{ Failure: f, - delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond, + delayDuration: clus.GetFailureDelayDuration(), } } @@ -60,7 +60,7 @@ func newFailureDelayPeerPortTxRxLeader(clus *Cluster) Failure { f := &failureLeader{ff, -1, -1} return &failureDelay{ Failure: f, - delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond, + delayDuration: clus.GetFailureDelayDuration(), } } @@ -72,6 +72,6 @@ func newFailureDelayPeerPortTxRxAll(clus *Cluster) Failure { } return &failureDelay{ Failure: f, - delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond, + delayDuration: clus.GetFailureDelayDuration(), } } diff --git a/tools/functional-tester/tester/failure_case_no_op.go b/tools/functional-tester/tester/failure_case_no_fail.go similarity index 60% rename from tools/functional-tester/tester/failure_case_no_op.go rename to tools/functional-tester/tester/failure_case_no_fail.go index 9a9ace462..f7b0dcad3 100644 --- a/tools/functional-tester/tester/failure_case_no_op.go +++ b/tools/functional-tester/tester/failure_case_no_fail.go @@ -18,36 +18,68 @@ import ( "time" "github.com/coreos/etcd/tools/functional-tester/rpcpb" + + "go.uber.org/zap" ) type failureNoFailWithStress failureByFunc -func (f *failureNoFailWithStress) Inject(clus *Cluster) error { return nil } -func (f *failureNoFailWithStress) Recover(clus *Cluster) error { return nil } -func (f *failureNoFailWithStress) FailureCase() rpcpb.FailureCase { return f.failureCase } +func (f *failureNoFailWithStress) Inject(clus *Cluster) error { + return nil +} -func newFailureNoFailWithStress() Failure { +func (f *failureNoFailWithStress) Recover(clus *Cluster) error { + return nil +} + +func (f *failureNoFailWithStress) FailureCase() rpcpb.FailureCase { + return f.failureCase +} + +func newFailureNoFailWithStress(clus *Cluster) Failure { f := &failureNoFailWithStress{ failureCase: rpcpb.FailureCase_NO_FAIL_WITH_STRESS, } return &failureDelay{ Failure: f, - delayDuration: 5 * time.Second, + delayDuration: clus.GetFailureDelayDuration(), } } type failureNoFailWithNoStressForLiveness failureByFunc -func (f *failureNoFailWithNoStressForLiveness) Inject(clus *Cluster) error { return nil } -func (f *failureNoFailWithNoStressForLiveness) Recover(clus *Cluster) error { return nil } -func (f *failureNoFailWithNoStressForLiveness) FailureCase() rpcpb.FailureCase { return f.failureCase } +func (f *failureNoFailWithNoStressForLiveness) Inject(clus *Cluster) error { + clus.lg.Info( + "extra delay for liveness mode with no stresser", + zap.Int("round", clus.rd), + zap.Int("case", clus.cs), + zap.String("desc", f.Desc()), + ) + time.Sleep(clus.GetFailureDelayDuration()) -func newFailureNoFailWithNoStressForLiveness() Failure { + clus.lg.Info( + "wait health in liveness mode", + zap.Int("round", clus.rd), + zap.Int("case", clus.cs), + zap.String("desc", f.Desc()), + ) + return clus.WaitHealth() +} + +func (f *failureNoFailWithNoStressForLiveness) Recover(clus *Cluster) error { + return nil +} + +func (f *failureNoFailWithNoStressForLiveness) FailureCase() rpcpb.FailureCase { + return f.failureCase +} + +func newFailureNoFailWithNoStressForLiveness(clus *Cluster) Failure { f := &failureNoFailWithNoStressForLiveness{ failureCase: rpcpb.FailureCase_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS, } return &failureDelay{ Failure: f, - delayDuration: 7 * time.Second, + delayDuration: clus.GetFailureDelayDuration(), } }