From 827dc18682d739eb492d34675a656ea081d3a4fe Mon Sep 17 00:00:00 2001 From: ZhouJianMS Date: Fri, 3 Nov 2023 15:53:29 +0800 Subject: [PATCH] Add IO stall failpoint in raft loop Signed-off-by: ZhouJianMS --- tests/framework/e2e/etcd_process.go | 22 ++++++++++++++ tests/robustness/failpoint/failpoint.go | 2 ++ tests/robustness/failpoint/gofail.go | 40 +++++++++++++++++++++++++ 3 files changed, 64 insertions(+) diff --git a/tests/framework/e2e/etcd_process.go b/tests/framework/e2e/etcd_process.go index cb8f4a20b..544b42ea6 100644 --- a/tests/framework/e2e/etcd_process.go +++ b/tests/framework/e2e/etcd_process.go @@ -369,6 +369,28 @@ func (f *BinaryFailpoints) SetupHTTP(ctx context.Context, failpoint, payload str return nil } +func (f *BinaryFailpoints) DeactivateHTTP(ctx context.Context, failpoint string) error { + host := fmt.Sprintf("127.0.0.1:%d", f.member.Config().GoFailPort) + failpointUrl := url.URL{ + Scheme: "http", + Host: host, + Path: failpoint, + } + r, err := http.NewRequestWithContext(ctx, "DELETE", failpointUrl.String(), nil) + if err != nil { + return err + } + resp, err := httpClient.Do(r) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusNoContent { + return fmt.Errorf("bad status code: %d", resp.StatusCode) + } + return nil +} + var httpClient = http.Client{ Timeout: 10 * time.Millisecond, } diff --git a/tests/robustness/failpoint/failpoint.go b/tests/robustness/failpoint/failpoint.go index 568a38f8a..4109973f4 100644 --- a/tests/robustness/failpoint/failpoint.go +++ b/tests/robustness/failpoint/failpoint.go @@ -48,6 +48,8 @@ var ( BeforeApplyOneConfChangeSleep, MemberReplace, DropPeerNetwork, + RaftBeforeSaveSleep, + RaftAfterSaveSleep, } ) diff --git a/tests/robustness/failpoint/gofail.go b/tests/robustness/failpoint/gofail.go index 6c1e5e63a..c42182e83 100644 --- a/tests/robustness/failpoint/gofail.go +++ b/tests/robustness/failpoint/gofail.go @@ -54,6 +54,8 @@ var ( RaftBeforeSaveSnapPanic Failpoint = goPanicFailpoint{"raftBeforeSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower} RaftAfterSaveSnapPanic Failpoint = goPanicFailpoint{"raftAfterSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower} BeforeApplyOneConfChangeSleep Failpoint = killAndGofailSleep{"beforeApplyOneConfChange", time.Second} + RaftBeforeSaveSleep Failpoint = gofailSleepAndDeactivate{"raftBeforeSave", time.Second} + RaftAfterSaveSleep Failpoint = gofailSleepAndDeactivate{"raftAfterSave", time.Second} ) type goPanicFailpoint struct { @@ -189,3 +191,41 @@ func (f killAndGofailSleep) Available(config e2e.EtcdProcessClusterConfig, membe } return memberFailpoints.Available(f.failpoint) } + +type gofailSleepAndDeactivate struct { + failpoint string + time time.Duration +} + +func (f gofailSleepAndDeactivate) Inject(ctx context.Context, t *testing.T, lg *zap.Logger, clus *e2e.EtcdProcessCluster) error { + member := clus.Procs[rand.Int()%len(clus.Procs)] + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + lg.Info("Setting up gofailpoint", zap.String("failpoint", f.Name())) + err := member.Failpoints().SetupHTTP(ctx, f.failpoint, fmt.Sprintf(`sleep(%q)`, f.time)) + if err != nil { + lg.Info("goFailpoint setup failed", zap.String("failpoint", f.Name()), zap.Error(err)) + } + time.Sleep(f.time) + lg.Info("Deactivating gofailpoint", zap.String("failpoint", f.Name())) + err = member.Failpoints().DeactivateHTTP(ctx, f.failpoint) + if err != nil { + lg.Info("goFailpoint deactivate failed", zap.String("failpoint", f.Name()), zap.Error(err)) + } + return nil +} + +func (f gofailSleepAndDeactivate) Name() string { + return fmt.Sprintf("%s=sleep(%s)", f.failpoint, f.time) +} + +func (f gofailSleepAndDeactivate) Available(config e2e.EtcdProcessClusterConfig, member e2e.EtcdProcess) bool { + memberFailpoints := member.Failpoints() + if memberFailpoints == nil { + return false + } + return memberFailpoints.Available(f.failpoint) +}