From a5cfc089fad193ba79148969a057988fc5a2ca2d Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Mon, 5 Dec 2022 12:25:36 +0100 Subject: [PATCH 1/3] tests: Refactor picking a member function Signed-off-by: Marek Siarkowicz --- tests/linearizability/failpoints.go | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tests/linearizability/failpoints.go b/tests/linearizability/failpoints.go index 247951c19..23162b620 100644 --- a/tests/linearizability/failpoints.go +++ b/tests/linearizability/failpoints.go @@ -115,16 +115,9 @@ const ( ) func (f goFailpoint) Trigger(t *testing.T, ctx context.Context, clus *e2e.EtcdProcessCluster) error { - var member e2e.EtcdProcess - switch f.target { - case AnyMember: - member = clus.Procs[rand.Int()%len(clus.Procs)] - case Leader: - member = clus.Procs[clus.WaitLeader(t)] - default: - panic("unknown target") - } + member := f.pickMember(t, clus) address := fmt.Sprintf("127.0.0.1:%d", member.Config().GoFailPort) + err := setupGoFailpoint(address, f.failpoint, f.payload) if err != nil { return fmt.Errorf("gofailpoint setup failed: %w", err) @@ -146,6 +139,17 @@ func (f goFailpoint) Trigger(t *testing.T, ctx context.Context, clus *e2e.EtcdPr return nil } +func (f goFailpoint) pickMember(t *testing.T, clus *e2e.EtcdProcessCluster) e2e.EtcdProcess { + switch f.target { + case AnyMember: + return clus.Procs[rand.Int()%len(clus.Procs)] + case Leader: + return clus.Procs[clus.WaitLeader(t)] + default: + panic("unknown target") + } +} + func setupGoFailpoint(host, failpoint, payload string) error { failpointUrl := url.URL{ Scheme: "http", From 738ee3687ae0a3c595d22dd45dd6a4b8acd035cd Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Mon, 5 Dec 2022 13:52:30 +0100 Subject: [PATCH 2/3] tests: Allow failpoint requests to fail assuming that process exists within 1 second Signed-off-by: Marek Siarkowicz --- tests/framework/e2e/cluster_proxy.go | 8 +++- tests/framework/e2e/etcd_process.go | 33 +++++++++++++-- tests/linearizability/failpoints.go | 60 ++++++++++++++++++---------- 3 files changed, 73 insertions(+), 28 deletions(-) diff --git a/tests/framework/e2e/cluster_proxy.go b/tests/framework/e2e/cluster_proxy.go index 98e563524..4e387410b 100644 --- a/tests/framework/e2e/cluster_proxy.go +++ b/tests/framework/e2e/cluster_proxy.go @@ -116,8 +116,12 @@ func (p *proxyEtcdProcess) Kill() error { return p.etcdProc.Kill() } -func (p *proxyEtcdProcess) Wait() error { - return p.etcdProc.Wait() +func (p *proxyEtcdProcess) IsRunning() bool { + return p.etcdProc.IsRunning() +} + +func (p *proxyEtcdProcess) Wait(ctx context.Context) error { + return p.etcdProc.Wait(ctx) } type proxyProc struct { diff --git a/tests/framework/e2e/etcd_process.go b/tests/framework/e2e/etcd_process.go index 070a77c4d..d5238b5a2 100644 --- a/tests/framework/e2e/etcd_process.go +++ b/tests/framework/e2e/etcd_process.go @@ -42,7 +42,8 @@ type EtcdProcess interface { EndpointsMetrics() []string Client(opts ...config.ClientOption) *EtcdctlV3 - Wait() error + IsRunning() bool + Wait(ctx context.Context) error Start(ctx context.Context) error Restart(ctx context.Context) error Stop() error @@ -201,11 +202,35 @@ func (ep *EtcdServerProcess) Kill() error { return ep.proc.Signal(syscall.SIGKILL) } -func (ep *EtcdServerProcess) Wait() error { - ep.proc.Wait() +func (ep *EtcdServerProcess) Wait(ctx context.Context) error { + ch := make(chan struct{}) + go func() { + defer close(ch) + if ep.proc != nil { + ep.proc.Wait() + ep.cfg.lg.Info("server exited", zap.String("name", ep.cfg.Name)) + } + }() + select { + case <-ch: + ep.proc = nil + return nil + case <-ctx.Done(): + return ctx.Err() + } +} + +func (ep *EtcdServerProcess) IsRunning() bool { + if ep.proc == nil { + return false + } + _, err := ep.proc.ExitCode() + if err == expect.ErrProcessRunning { + return true + } ep.cfg.lg.Info("server exited", zap.String("name", ep.cfg.Name)) ep.proc = nil - return nil + return false } func AssertProcessLogs(t *testing.T, ep EtcdProcess, expectLog string) { diff --git a/tests/linearizability/failpoints.go b/tests/linearizability/failpoints.go index 23162b620..77297dc3f 100644 --- a/tests/linearizability/failpoints.go +++ b/tests/linearizability/failpoints.go @@ -31,6 +31,10 @@ import ( "go.etcd.io/etcd/tests/v3/framework/e2e" ) +const ( + triggerTimeout = time.Second +) + var ( KillFailpoint Failpoint = killFailpoint{} DefragBeforeCopyPanic Failpoint = goFailpoint{"backend/defragBeforeCopy", "panic", triggerDefrag, AnyMember} @@ -81,15 +85,21 @@ type killFailpoint struct{} func (f killFailpoint) Trigger(t *testing.T, ctx context.Context, clus *e2e.EtcdProcessCluster) error { member := clus.Procs[rand.Int()%len(clus.Procs)] - err := member.Kill() - if err != nil { - return err + + killCtx, cancel := context.WithTimeout(ctx, triggerTimeout) + defer cancel() + for member.IsRunning() { + err := member.Kill() + if err != nil { + t.Logf("sending kill signal failed: %v", err) + } + err = member.Wait(killCtx) + if err != nil && !strings.Contains(err.Error(), "unexpected exit code") { + return fmt.Errorf("failed to kill the process within %s, err: %w", triggerTimeout, err) + } } - err = member.Wait() - if err != nil && !strings.Contains(err.Error(), "unexpected exit code") { - return err - } - err = member.Start(ctx) + + err := member.Start(ctx) if err != nil { return err } @@ -118,21 +128,27 @@ func (f goFailpoint) Trigger(t *testing.T, ctx context.Context, clus *e2e.EtcdPr member := f.pickMember(t, clus) address := fmt.Sprintf("127.0.0.1:%d", member.Config().GoFailPort) - err := setupGoFailpoint(address, f.failpoint, f.payload) - if err != nil { - return fmt.Errorf("gofailpoint setup failed: %w", err) - } - if f.trigger != nil { - err = f.trigger(ctx, member) + triggerCtx, cancel := context.WithTimeout(ctx, triggerTimeout) + defer cancel() + + for member.IsRunning() { + err := setupGoFailpoint(triggerCtx, address, f.failpoint, f.payload) if err != nil { - return fmt.Errorf("triggering gofailpoint failed: %w", err) + t.Logf("gofailpoint setup failed: %v", err) + } + if f.trigger != nil { + err = f.trigger(triggerCtx, member) + if err != nil { + t.Logf("triggering gofailpoint failed: %v", err) + } + } + err = member.Wait(triggerCtx) + if err != nil && !strings.Contains(err.Error(), "unexpected exit code") { + return fmt.Errorf("failed to trigger a process panic within %s, err: %w", triggerTimeout, err) } } - err = member.Wait() - if err != nil && !strings.Contains(err.Error(), "unexpected exit code") { - return err - } - err = member.Start(ctx) + + err := member.Start(ctx) if err != nil { return err } @@ -150,13 +166,13 @@ func (f goFailpoint) pickMember(t *testing.T, clus *e2e.EtcdProcessCluster) e2e. } } -func setupGoFailpoint(host, failpoint, payload string) error { +func setupGoFailpoint(ctx context.Context, host, failpoint, payload string) error { failpointUrl := url.URL{ Scheme: "http", Host: host, Path: failpoint, } - r, err := http.NewRequest("PUT", failpointUrl.String(), bytes.NewBuffer([]byte(payload))) + r, err := http.NewRequestWithContext(ctx, "PUT", failpointUrl.String(), bytes.NewBuffer([]byte(payload))) if err != nil { return err } From f2bc0823f6f2b9661c3ba6b47b2d0c653af3a840 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Tue, 6 Dec 2022 09:07:43 +0100 Subject: [PATCH 3/3] tests: Make it explicit that we only suppurt panic failpoints Signed-off-by: Marek Siarkowicz --- tests/linearizability/failpoints.go | 61 ++++++++++++++--------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/tests/linearizability/failpoints.go b/tests/linearizability/failpoints.go index 77297dc3f..81e391852 100644 --- a/tests/linearizability/failpoints.go +++ b/tests/linearizability/failpoints.go @@ -37,25 +37,25 @@ const ( var ( KillFailpoint Failpoint = killFailpoint{} - DefragBeforeCopyPanic Failpoint = goFailpoint{"backend/defragBeforeCopy", "panic", triggerDefrag, AnyMember} - DefragBeforeRenamePanic Failpoint = goFailpoint{"backend/defragBeforeRename", "panic", triggerDefrag, AnyMember} - BeforeCommitPanic Failpoint = goFailpoint{"backend/beforeCommit", "panic", nil, AnyMember} - AfterCommitPanic Failpoint = goFailpoint{"backend/afterCommit", "panic", nil, AnyMember} - RaftBeforeSavePanic Failpoint = goFailpoint{"etcdserver/raftBeforeSave", "panic", nil, AnyMember} - RaftAfterSavePanic Failpoint = goFailpoint{"etcdserver/raftAfterSave", "panic", nil, AnyMember} - BackendBeforePreCommitHookPanic Failpoint = goFailpoint{"backend/commitBeforePreCommitHook", "panic", nil, AnyMember} - BackendAfterPreCommitHookPanic Failpoint = goFailpoint{"backend/commitAfterPreCommitHook", "panic", nil, AnyMember} - BackendBeforeStartDBTxnPanic Failpoint = goFailpoint{"backend/beforeStartDBTxn", "panic", nil, AnyMember} - BackendAfterStartDBTxnPanic Failpoint = goFailpoint{"backend/afterStartDBTxn", "panic", nil, AnyMember} - BackendBeforeWritebackBufPanic Failpoint = goFailpoint{"backend/beforeWritebackBuf", "panic", nil, AnyMember} - BackendAfterWritebackBufPanic Failpoint = goFailpoint{"backend/afterWritebackBuf", "panic", nil, AnyMember} - CompactBeforeCommitScheduledCompactPanic Failpoint = goFailpoint{"mvcc/compactBeforeCommitScheduledCompact", "panic", triggerCompact, AnyMember} - CompactAfterCommitScheduledCompactPanic Failpoint = goFailpoint{"mvcc/compactAfterCommitScheduledCompact", "panic", triggerCompact, AnyMember} - CompactBeforeSetFinishedCompactPanic Failpoint = goFailpoint{"mvcc/compactBeforeSetFinishedCompact", "panic", triggerCompact, AnyMember} - CompactAfterSetFinishedCompactPanic Failpoint = goFailpoint{"mvcc/compactAfterSetFinishedCompact", "panic", triggerCompact, AnyMember} - CompactBeforeCommitBatchPanic Failpoint = goFailpoint{"mvcc/compactBeforeCommitBatch", "panic", triggerCompact, AnyMember} - CompactAfterCommitBatchPanic Failpoint = goFailpoint{"mvcc/compactAfterCommitBatch", "panic", triggerCompact, AnyMember} - RaftBeforeLeaderSendPanic Failpoint = goFailpoint{"etcdserver/raftBeforeLeaderSend", "panic", nil, Leader} + DefragBeforeCopyPanic Failpoint = goPanicFailpoint{"backend/defragBeforeCopy", triggerDefrag, AnyMember} + DefragBeforeRenamePanic Failpoint = goPanicFailpoint{"backend/defragBeforeRename", triggerDefrag, AnyMember} + BeforeCommitPanic Failpoint = goPanicFailpoint{"backend/beforeCommit", nil, AnyMember} + AfterCommitPanic Failpoint = goPanicFailpoint{"backend/afterCommit", nil, AnyMember} + RaftBeforeSavePanic Failpoint = goPanicFailpoint{"etcdserver/raftBeforeSave", nil, AnyMember} + RaftAfterSavePanic Failpoint = goPanicFailpoint{"etcdserver/raftAfterSave", nil, AnyMember} + BackendBeforePreCommitHookPanic Failpoint = goPanicFailpoint{"backend/commitBeforePreCommitHook", nil, AnyMember} + BackendAfterPreCommitHookPanic Failpoint = goPanicFailpoint{"backend/commitAfterPreCommitHook", nil, AnyMember} + BackendBeforeStartDBTxnPanic Failpoint = goPanicFailpoint{"backend/beforeStartDBTxn", nil, AnyMember} + BackendAfterStartDBTxnPanic Failpoint = goPanicFailpoint{"backend/afterStartDBTxn", nil, AnyMember} + BackendBeforeWritebackBufPanic Failpoint = goPanicFailpoint{"backend/beforeWritebackBuf", nil, AnyMember} + BackendAfterWritebackBufPanic Failpoint = goPanicFailpoint{"backend/afterWritebackBuf", nil, AnyMember} + CompactBeforeCommitScheduledCompactPanic Failpoint = goPanicFailpoint{"mvcc/compactBeforeCommitScheduledCompact", triggerCompact, AnyMember} + CompactAfterCommitScheduledCompactPanic Failpoint = goPanicFailpoint{"mvcc/compactAfterCommitScheduledCompact", triggerCompact, AnyMember} + CompactBeforeSetFinishedCompactPanic Failpoint = goPanicFailpoint{"mvcc/compactBeforeSetFinishedCompact", triggerCompact, AnyMember} + CompactAfterSetFinishedCompactPanic Failpoint = goPanicFailpoint{"mvcc/compactAfterSetFinishedCompact", triggerCompact, AnyMember} + CompactBeforeCommitBatchPanic Failpoint = goPanicFailpoint{"mvcc/compactBeforeCommitBatch", triggerCompact, AnyMember} + CompactAfterCommitBatchPanic Failpoint = goPanicFailpoint{"mvcc/compactAfterCommitBatch", triggerCompact, AnyMember} + RaftBeforeLeaderSendPanic Failpoint = goPanicFailpoint{"etcdserver/raftBeforeLeaderSend", nil, Leader} RandomFailpoint Failpoint = randomFailpoint{[]Failpoint{ KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic, RaftAfterSavePanic, DefragBeforeCopyPanic, DefragBeforeRenamePanic, @@ -68,12 +68,12 @@ var ( RaftBeforeLeaderSendPanic, }} // TODO: Figure out how to reliably trigger below failpoints and add them to RandomFailpoint - raftBeforeApplySnapPanic Failpoint = goFailpoint{"etcdserver/raftBeforeApplySnap", "panic", nil, AnyMember} - raftAfterApplySnapPanic Failpoint = goFailpoint{"etcdserver/raftAfterApplySnap", "panic", nil, AnyMember} - raftAfterWALReleasePanic Failpoint = goFailpoint{"etcdserver/raftAfterWALRelease", "panic", nil, AnyMember} - raftBeforeFollowerSendPanic Failpoint = goFailpoint{"etcdserver/raftBeforeFollowerSend", "panic", nil, AnyMember} - raftBeforeSaveSnapPanic Failpoint = goFailpoint{"etcdserver/raftBeforeSaveSnap", "panic", nil, AnyMember} - raftAfterSaveSnapPanic Failpoint = goFailpoint{"etcdserver/raftAfterSaveSnap", "panic", nil, AnyMember} + raftBeforeApplySnapPanic Failpoint = goPanicFailpoint{"etcdserver/raftBeforeApplySnap", nil, AnyMember} + raftAfterApplySnapPanic Failpoint = goPanicFailpoint{"etcdserver/raftAfterApplySnap", nil, AnyMember} + raftAfterWALReleasePanic Failpoint = goPanicFailpoint{"etcdserver/raftAfterWALRelease", nil, AnyMember} + raftBeforeFollowerSendPanic Failpoint = goPanicFailpoint{"etcdserver/raftBeforeFollowerSend", nil, AnyMember} + raftBeforeSaveSnapPanic Failpoint = goPanicFailpoint{"etcdserver/raftBeforeSaveSnap", nil, AnyMember} + raftAfterSaveSnapPanic Failpoint = goPanicFailpoint{"etcdserver/raftAfterSaveSnap", nil, AnyMember} ) type Failpoint interface { @@ -110,9 +110,8 @@ func (f killFailpoint) Name() string { return "Kill" } -type goFailpoint struct { +type goPanicFailpoint struct { failpoint string - payload string trigger func(ctx context.Context, member e2e.EtcdProcess) error target failpointTarget } @@ -124,7 +123,7 @@ const ( Leader failpointTarget = "Leader" ) -func (f goFailpoint) Trigger(t *testing.T, ctx context.Context, clus *e2e.EtcdProcessCluster) error { +func (f goPanicFailpoint) Trigger(t *testing.T, ctx context.Context, clus *e2e.EtcdProcessCluster) error { member := f.pickMember(t, clus) address := fmt.Sprintf("127.0.0.1:%d", member.Config().GoFailPort) @@ -132,7 +131,7 @@ func (f goFailpoint) Trigger(t *testing.T, ctx context.Context, clus *e2e.EtcdPr defer cancel() for member.IsRunning() { - err := setupGoFailpoint(triggerCtx, address, f.failpoint, f.payload) + err := setupGoFailpoint(triggerCtx, address, f.failpoint, "panic") if err != nil { t.Logf("gofailpoint setup failed: %v", err) } @@ -155,7 +154,7 @@ func (f goFailpoint) Trigger(t *testing.T, ctx context.Context, clus *e2e.EtcdPr return nil } -func (f goFailpoint) pickMember(t *testing.T, clus *e2e.EtcdProcessCluster) e2e.EtcdProcess { +func (f goPanicFailpoint) pickMember(t *testing.T, clus *e2e.EtcdProcessCluster) e2e.EtcdProcess { switch f.target { case AnyMember: return clus.Procs[rand.Int()%len(clus.Procs)] @@ -187,7 +186,7 @@ func setupGoFailpoint(ctx context.Context, host, failpoint, payload string) erro return nil } -func (f goFailpoint) Name() string { +func (f goPanicFailpoint) Name() string { return f.failpoint }