functional-tester/tester: implement liveness mode failure case

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
This commit is contained in:
Gyuho Lee 2018-04-04 13:29:24 -07:00
parent 33753d4ae6
commit a43bd84631
7 changed files with 71 additions and 29 deletions

View File

@ -294,10 +294,9 @@ func (clus *Cluster) updateFailures() {
}
clus.failures = append(clus.failures, fpFailures...)
case "NO_FAIL_WITH_STRESS":
clus.failures = append(clus.failures, newFailureNoFailWithStress())
clus.failures = append(clus.failures, newFailureNoFailWithStress(clus))
case "NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS":
// TODO
clus.failures = append(clus.failures, newFailureNoFailWithNoStressForLiveness())
clus.failures = append(clus.failures, newFailureNoFailWithNoStressForLiveness(clus))
case "EXTERNAL":
clus.failures = append(clus.failures, newFailureExternal(clus.Tester.ExternalExecPath))
}
@ -762,4 +761,12 @@ func (clus *Cluster) defrag() error {
return nil
}
func (clus *Cluster) Report() int64 { return clus.stresser.ModifiedKeys() }
// GetFailureDelayDuration computes failure delay duration.
func (clus *Cluster) GetFailureDelayDuration() time.Duration {
return time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond
}
// Report reports the number of modified keys.
func (clus *Cluster) Report() int64 {
return clus.stresser.ModifiedKeys()
}

View File

@ -19,6 +19,8 @@ import (
"os"
"time"
"github.com/coreos/etcd/tools/functional-tester/rpcpb"
"go.uber.org/zap"
)
@ -127,8 +129,10 @@ func (clus *Cluster) doRound() error {
return fmt.Errorf("wait full health error: %v", err)
}
// TODO: "NO_FAIL_WITH_STRESS"
// TODO: "NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS"
if fa.FailureCase() == rpcpb.FailureCase_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS {
clus.lg.Info("pausing stresser after before injecting failures")
clus.pauseStresser()
}
clus.lg.Info(
"injecting failure",
@ -165,8 +169,10 @@ func (clus *Cluster) doRound() error {
zap.String("desc", fa.Desc()),
)
clus.lg.Info("pausing stresser after failure recovery, before wait health")
clus.pauseStresser()
if fa.FailureCase() != rpcpb.FailureCase_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS {
clus.lg.Info("pausing stresser after failure recovery, before wait health")
clus.pauseStresser()
}
clus.lg.Info("wait health after recovering failures")
if err := clus.WaitHealth(); err != nil {

View File

@ -223,7 +223,7 @@ func (f *failureUntilSnapshot) Desc() string {
if f.desc.Desc() != "" {
return f.desc.Desc()
}
return f.failureCase.String() + " (to trigger snapshot)"
return f.failureCase.String()
}
func (f *failureUntilSnapshot) FailureCase() rpcpb.FailureCase {

View File

@ -20,7 +20,6 @@ import (
"net/http"
"strings"
"sync"
"time"
"github.com/coreos/etcd/tools/functional-tester/rpcpb"
)
@ -59,7 +58,7 @@ func failpointFailures(clus *Cluster) (ret []Failure, err error) {
} else {
fpFails[i] = &failureDelay{
Failure: fpf,
delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond,
delayDuration: clus.GetFailureDelayDuration(),
}
}
}

View File

@ -15,8 +15,6 @@
package tester
import (
"time"
"github.com/coreos/etcd/tools/functional-tester/rpcpb"
)
@ -37,7 +35,7 @@ func newFailureBlackholePeerPortTxRxOneFollower(clus *Cluster) Failure {
f := &failureFollower{ff, -1, -1}
return &failureDelay{
Failure: f,
delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond,
delayDuration: clus.GetFailureDelayDuration(),
}
}
@ -50,7 +48,7 @@ func newFailureBlackholePeerPortTxRxLeader(clus *Cluster) Failure {
f := &failureLeader{ff, -1, -1}
return &failureDelay{
Failure: f,
delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond,
delayDuration: clus.GetFailureDelayDuration(),
}
}
@ -62,6 +60,6 @@ func newFailureBlackholePeerPortTxRxAll(clus *Cluster) Failure {
}
return &failureDelay{
Failure: f,
delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond,
delayDuration: clus.GetFailureDelayDuration(),
}
}

View File

@ -47,7 +47,7 @@ func newFailureDelayPeerPortTxRxOneFollower(clus *Cluster) Failure {
f := &failureFollower{ff, -1, -1}
return &failureDelay{
Failure: f,
delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond,
delayDuration: clus.GetFailureDelayDuration(),
}
}
@ -60,7 +60,7 @@ func newFailureDelayPeerPortTxRxLeader(clus *Cluster) Failure {
f := &failureLeader{ff, -1, -1}
return &failureDelay{
Failure: f,
delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond,
delayDuration: clus.GetFailureDelayDuration(),
}
}
@ -72,6 +72,6 @@ func newFailureDelayPeerPortTxRxAll(clus *Cluster) Failure {
}
return &failureDelay{
Failure: f,
delayDuration: time.Duration(clus.Tester.FailureDelayMs) * time.Millisecond,
delayDuration: clus.GetFailureDelayDuration(),
}
}

View File

@ -18,36 +18,68 @@ import (
"time"
"github.com/coreos/etcd/tools/functional-tester/rpcpb"
"go.uber.org/zap"
)
type failureNoFailWithStress failureByFunc
func (f *failureNoFailWithStress) Inject(clus *Cluster) error { return nil }
func (f *failureNoFailWithStress) Recover(clus *Cluster) error { return nil }
func (f *failureNoFailWithStress) FailureCase() rpcpb.FailureCase { return f.failureCase }
func (f *failureNoFailWithStress) Inject(clus *Cluster) error {
return nil
}
func newFailureNoFailWithStress() Failure {
func (f *failureNoFailWithStress) Recover(clus *Cluster) error {
return nil
}
func (f *failureNoFailWithStress) FailureCase() rpcpb.FailureCase {
return f.failureCase
}
func newFailureNoFailWithStress(clus *Cluster) Failure {
f := &failureNoFailWithStress{
failureCase: rpcpb.FailureCase_NO_FAIL_WITH_STRESS,
}
return &failureDelay{
Failure: f,
delayDuration: 5 * time.Second,
delayDuration: clus.GetFailureDelayDuration(),
}
}
type failureNoFailWithNoStressForLiveness failureByFunc
func (f *failureNoFailWithNoStressForLiveness) Inject(clus *Cluster) error { return nil }
func (f *failureNoFailWithNoStressForLiveness) Recover(clus *Cluster) error { return nil }
func (f *failureNoFailWithNoStressForLiveness) FailureCase() rpcpb.FailureCase { return f.failureCase }
func (f *failureNoFailWithNoStressForLiveness) Inject(clus *Cluster) error {
clus.lg.Info(
"extra delay for liveness mode with no stresser",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.String("desc", f.Desc()),
)
time.Sleep(clus.GetFailureDelayDuration())
func newFailureNoFailWithNoStressForLiveness() Failure {
clus.lg.Info(
"wait health in liveness mode",
zap.Int("round", clus.rd),
zap.Int("case", clus.cs),
zap.String("desc", f.Desc()),
)
return clus.WaitHealth()
}
func (f *failureNoFailWithNoStressForLiveness) Recover(clus *Cluster) error {
return nil
}
func (f *failureNoFailWithNoStressForLiveness) FailureCase() rpcpb.FailureCase {
return f.failureCase
}
func newFailureNoFailWithNoStressForLiveness(clus *Cluster) Failure {
f := &failureNoFailWithNoStressForLiveness{
failureCase: rpcpb.FailureCase_NO_FAIL_WITH_NO_STRESS_FOR_LIVENESS,
}
return &failureDelay{
Failure: f,
delayDuration: 7 * time.Second,
delayDuration: clus.GetFailureDelayDuration(),
}
}