mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
tests/robustness: Retry injecting failpoint
Signed-off-by: Marek Siarkowicz <siarkowicz@google.com>
This commit is contained in:
parent
90cbadc660
commit
43b2477c28
@ -67,7 +67,7 @@ var (
|
|||||||
RaftAfterWALReleasePanic Failpoint = goPanicFailpoint{"raftAfterWALRelease", triggerBlackhole{waitTillSnapshot: true}, Follower}
|
RaftAfterWALReleasePanic Failpoint = goPanicFailpoint{"raftAfterWALRelease", triggerBlackhole{waitTillSnapshot: true}, Follower}
|
||||||
RaftBeforeSaveSnapPanic Failpoint = goPanicFailpoint{"raftBeforeSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower}
|
RaftBeforeSaveSnapPanic Failpoint = goPanicFailpoint{"raftBeforeSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower}
|
||||||
RaftAfterSaveSnapPanic Failpoint = goPanicFailpoint{"raftAfterSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower}
|
RaftAfterSaveSnapPanic Failpoint = goPanicFailpoint{"raftAfterSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower}
|
||||||
RandomFailpoint Failpoint = randomFailpoint{[]Failpoint{
|
allFailpoints = []Failpoint{
|
||||||
KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic, RaftAfterSavePanic,
|
KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic, RaftAfterSavePanic,
|
||||||
DefragBeforeCopyPanic, DefragBeforeRenamePanic, BackendBeforePreCommitHookPanic, BackendAfterPreCommitHookPanic,
|
DefragBeforeCopyPanic, DefragBeforeRenamePanic, BackendBeforePreCommitHookPanic, BackendAfterPreCommitHookPanic,
|
||||||
BackendBeforeStartDBTxnPanic, BackendAfterStartDBTxnPanic, BackendBeforeWritebackBufPanic,
|
BackendBeforeStartDBTxnPanic, BackendAfterStartDBTxnPanic, BackendBeforeWritebackBufPanic,
|
||||||
@ -76,22 +76,40 @@ var (
|
|||||||
CompactAfterCommitBatchPanic, RaftBeforeLeaderSendPanic, BlackholePeerNetwork, DelayPeerNetwork,
|
CompactAfterCommitBatchPanic, RaftBeforeLeaderSendPanic, BlackholePeerNetwork, DelayPeerNetwork,
|
||||||
RaftBeforeFollowerSendPanic, RaftBeforeApplySnapPanic, RaftAfterApplySnapPanic, RaftAfterWALReleasePanic,
|
RaftBeforeFollowerSendPanic, RaftBeforeApplySnapPanic, RaftAfterApplySnapPanic, RaftAfterWALReleasePanic,
|
||||||
RaftBeforeSaveSnapPanic, RaftAfterSaveSnapPanic, BlackholeUntilSnapshot,
|
RaftBeforeSaveSnapPanic, RaftAfterSaveSnapPanic, BlackholeUntilSnapshot,
|
||||||
}}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func pickRandomFailpoint(t *testing.T, clus *e2e.EtcdProcessCluster) Failpoint {
|
||||||
|
availableFailpoints := make([]Failpoint, 0, len(allFailpoints))
|
||||||
|
for _, failpoint := range allFailpoints {
|
||||||
|
err := validateFailpoint(clus, failpoint)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
availableFailpoints = append(availableFailpoints, failpoint)
|
||||||
|
}
|
||||||
|
if len(availableFailpoints) == 0 {
|
||||||
|
t.Errorf("No available failpoints")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return availableFailpoints[rand.Int()%len(availableFailpoints)]
|
||||||
|
}
|
||||||
|
|
||||||
|
func validateFailpoint(clus *e2e.EtcdProcessCluster, failpoint Failpoint) error {
|
||||||
|
for _, proc := range clus.Procs {
|
||||||
|
if !failpoint.Available(*clus.Cfg, proc) {
|
||||||
|
return fmt.Errorf("failpoint %q not available on %s", failpoint.Name(), proc.Config().Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func injectFailpoints(ctx context.Context, t *testing.T, lg *zap.Logger, clus *e2e.EtcdProcessCluster, failpoint Failpoint) {
|
func injectFailpoints(ctx context.Context, t *testing.T, lg *zap.Logger, clus *e2e.EtcdProcessCluster, failpoint Failpoint) {
|
||||||
ctx, cancel := context.WithTimeout(ctx, triggerTimeout)
|
ctx, cancel := context.WithTimeout(ctx, triggerTimeout)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
var err error
|
var err error
|
||||||
successes := 0
|
successes := 0
|
||||||
failures := 0
|
failures := 0
|
||||||
for _, proc := range clus.Procs {
|
|
||||||
if !failpoint.Available(*clus.Cfg, proc) {
|
|
||||||
t.Errorf("Failpoint %q not available on %s", failpoint.Name(), proc.Config().Name)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for successes < failpointInjectionsCount && failures < failpointInjectionsRetries {
|
for successes < failpointInjectionsCount && failures < failpointInjectionsRetries {
|
||||||
time.Sleep(waitBetweenFailpointTriggers)
|
time.Sleep(waitBetweenFailpointTriggers)
|
||||||
|
|
||||||
@ -230,10 +248,16 @@ func (f goPanicFailpoint) Inject(ctx context.Context, t *testing.T, lg *zap.Logg
|
|||||||
member := f.pickMember(t, clus)
|
member := f.pickMember(t, clus)
|
||||||
|
|
||||||
for member.IsRunning() {
|
for member.IsRunning() {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
lg.Info("Setting up gofailpoint", zap.String("failpoint", f.Name()))
|
lg.Info("Setting up gofailpoint", zap.String("failpoint", f.Name()))
|
||||||
err := member.Failpoints().Setup(ctx, f.failpoint, "panic")
|
err := member.Failpoints().Setup(ctx, f.failpoint, "panic")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
lg.Info("goFailpoint setup failed", zap.String("failpoint", f.Name()), zap.Error(err))
|
lg.Info("goFailpoint setup failed", zap.String("failpoint", f.Name()), zap.Error(err))
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
if !member.IsRunning() {
|
if !member.IsRunning() {
|
||||||
// TODO: Check member logs that etcd not running is caused panic caused by proper gofailpoint.
|
// TODO: Check member logs that etcd not running is caused panic caused by proper gofailpoint.
|
||||||
@ -255,11 +279,7 @@ func (f goPanicFailpoint) Inject(ctx context.Context, t *testing.T, lg *zap.Logg
|
|||||||
lg.Info("Member exited as expected", zap.String("member", member.Config().Name))
|
lg.Info("Member exited as expected", zap.String("member", member.Config().Name))
|
||||||
}
|
}
|
||||||
|
|
||||||
err := member.Start(ctx)
|
return member.Start(ctx)
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f goPanicFailpoint) pickMember(t *testing.T, clus *e2e.EtcdProcessCluster) e2e.EtcdProcess {
|
func (f goPanicFailpoint) pickMember(t *testing.T, clus *e2e.EtcdProcessCluster) e2e.EtcdProcess {
|
||||||
@ -347,40 +367,6 @@ func (t triggerCompact) Available(e2e.EtcdProcessClusterConfig, e2e.EtcdProcess)
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
type randomFailpoint struct {
|
|
||||||
failpoints []Failpoint
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f randomFailpoint) Inject(ctx context.Context, t *testing.T, lg *zap.Logger, clus *e2e.EtcdProcessCluster) error {
|
|
||||||
availableFailpoints := make([]Failpoint, 0, len(f.failpoints))
|
|
||||||
for _, failpoint := range f.failpoints {
|
|
||||||
count := 0
|
|
||||||
for _, proc := range clus.Procs {
|
|
||||||
if failpoint.Available(*clus.Cfg, proc) {
|
|
||||||
count++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if count == len(clus.Procs) {
|
|
||||||
availableFailpoints = append(availableFailpoints, failpoint)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(availableFailpoints) == 0 {
|
|
||||||
t.Errorf("No available failpoints")
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
failpoint := availableFailpoints[rand.Int()%len(availableFailpoints)]
|
|
||||||
lg.Info("Triggering failpoint\n", zap.String("failpoint", failpoint.Name()))
|
|
||||||
return failpoint.Inject(ctx, t, lg, clus)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f randomFailpoint) Name() string {
|
|
||||||
return "Random"
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f randomFailpoint) Available(e2e.EtcdProcessClusterConfig, e2e.EtcdProcess) bool {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
type blackholePeerNetworkFailpoint struct {
|
type blackholePeerNetworkFailpoint struct {
|
||||||
triggerBlackhole
|
triggerBlackhole
|
||||||
}
|
}
|
||||||
|
@ -39,9 +39,8 @@ func TestRobustness(t *testing.T) {
|
|||||||
scenarios := []testScenario{}
|
scenarios := []testScenario{}
|
||||||
for _, traffic := range []traffic.Config{traffic.LowTraffic, traffic.HighTraffic, traffic.KubernetesTraffic} {
|
for _, traffic := range []traffic.Config{traffic.LowTraffic, traffic.HighTraffic, traffic.KubernetesTraffic} {
|
||||||
scenarios = append(scenarios, testScenario{
|
scenarios = append(scenarios, testScenario{
|
||||||
name: traffic.Name + "ClusterOfSize1",
|
name: traffic.Name + "ClusterOfSize1",
|
||||||
failpoint: RandomFailpoint,
|
traffic: traffic,
|
||||||
traffic: traffic,
|
|
||||||
cluster: *e2e.NewConfig(
|
cluster: *e2e.NewConfig(
|
||||||
e2e.WithClusterSize(1),
|
e2e.WithClusterSize(1),
|
||||||
e2e.WithSnapshotCount(100),
|
e2e.WithSnapshotCount(100),
|
||||||
@ -62,10 +61,9 @@ func TestRobustness(t *testing.T) {
|
|||||||
clusterOfSize3Options = append(clusterOfSize3Options, e2e.WithSnapshotCatchUpEntries(100))
|
clusterOfSize3Options = append(clusterOfSize3Options, e2e.WithSnapshotCatchUpEntries(100))
|
||||||
}
|
}
|
||||||
scenarios = append(scenarios, testScenario{
|
scenarios = append(scenarios, testScenario{
|
||||||
name: traffic.Name + "ClusterOfSize3",
|
name: traffic.Name + "ClusterOfSize3",
|
||||||
failpoint: RandomFailpoint,
|
traffic: traffic,
|
||||||
traffic: traffic,
|
cluster: *e2e.NewConfig(clusterOfSize3Options...),
|
||||||
cluster: *e2e.NewConfig(clusterOfSize3Options...),
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
scenarios = append(scenarios, testScenario{
|
scenarios = append(scenarios, testScenario{
|
||||||
@ -93,8 +91,7 @@ func TestRobustness(t *testing.T) {
|
|||||||
),
|
),
|
||||||
})
|
})
|
||||||
scenarios = append(scenarios, testScenario{
|
scenarios = append(scenarios, testScenario{
|
||||||
name: "Issue15220",
|
name: "Issue15220",
|
||||||
failpoint: RandomFailpoint,
|
|
||||||
watch: watchConfig{
|
watch: watchConfig{
|
||||||
requestProgress: true,
|
requestProgress: true,
|
||||||
},
|
},
|
||||||
@ -145,6 +142,15 @@ func testRobustness(ctx context.Context, t *testing.T, lg *zap.Logger, s testSce
|
|||||||
}
|
}
|
||||||
defer r.clus.Close()
|
defer r.clus.Close()
|
||||||
|
|
||||||
|
if s.failpoint == nil {
|
||||||
|
s.failpoint = pickRandomFailpoint(t, r.clus)
|
||||||
|
} else {
|
||||||
|
err = validateFailpoint(r.clus, s.failpoint)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// t.Failed() returns false during panicking. We need to forcibly
|
// t.Failed() returns false during panicking. We need to forcibly
|
||||||
// save data on panicking.
|
// save data on panicking.
|
||||||
// Refer to: https://github.com/golang/go/issues/49929
|
// Refer to: https://github.com/golang/go/issues/49929
|
||||||
|
Loading…
x
Reference in New Issue
Block a user