Merge pull request #14897 from serathius/linearizability-trigger-exit

Allow failpoint requests to fail assuming that process exists within 1 second
This commit is contained in:
Marek Siarkowicz 2022-12-06 09:31:21 +01:00 committed by GitHub
commit 42bb543315
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 114 additions and 66 deletions

View File

@ -116,8 +116,12 @@ func (p *proxyEtcdProcess) Kill() error {
return p.etcdProc.Kill() return p.etcdProc.Kill()
} }
func (p *proxyEtcdProcess) Wait() error { func (p *proxyEtcdProcess) IsRunning() bool {
return p.etcdProc.Wait() return p.etcdProc.IsRunning()
}
func (p *proxyEtcdProcess) Wait(ctx context.Context) error {
return p.etcdProc.Wait(ctx)
} }
type proxyProc struct { type proxyProc struct {

View File

@ -42,7 +42,8 @@ type EtcdProcess interface {
EndpointsMetrics() []string EndpointsMetrics() []string
Client(opts ...config.ClientOption) *EtcdctlV3 Client(opts ...config.ClientOption) *EtcdctlV3
Wait() error IsRunning() bool
Wait(ctx context.Context) error
Start(ctx context.Context) error Start(ctx context.Context) error
Restart(ctx context.Context) error Restart(ctx context.Context) error
Stop() error Stop() error
@ -201,11 +202,35 @@ func (ep *EtcdServerProcess) Kill() error {
return ep.proc.Signal(syscall.SIGKILL) return ep.proc.Signal(syscall.SIGKILL)
} }
func (ep *EtcdServerProcess) Wait() error { func (ep *EtcdServerProcess) Wait(ctx context.Context) error {
ep.proc.Wait() ch := make(chan struct{})
go func() {
defer close(ch)
if ep.proc != nil {
ep.proc.Wait()
ep.cfg.lg.Info("server exited", zap.String("name", ep.cfg.Name))
}
}()
select {
case <-ch:
ep.proc = nil
return nil
case <-ctx.Done():
return ctx.Err()
}
}
func (ep *EtcdServerProcess) IsRunning() bool {
if ep.proc == nil {
return false
}
_, err := ep.proc.ExitCode()
if err == expect.ErrProcessRunning {
return true
}
ep.cfg.lg.Info("server exited", zap.String("name", ep.cfg.Name)) ep.cfg.lg.Info("server exited", zap.String("name", ep.cfg.Name))
ep.proc = nil ep.proc = nil
return nil return false
} }
func AssertProcessLogs(t *testing.T, ep EtcdProcess, expectLog string) { func AssertProcessLogs(t *testing.T, ep EtcdProcess, expectLog string) {

View File

@ -31,27 +31,31 @@ import (
"go.etcd.io/etcd/tests/v3/framework/e2e" "go.etcd.io/etcd/tests/v3/framework/e2e"
) )
const (
triggerTimeout = time.Second
)
var ( var (
KillFailpoint Failpoint = killFailpoint{} KillFailpoint Failpoint = killFailpoint{}
DefragBeforeCopyPanic Failpoint = goFailpoint{"backend/defragBeforeCopy", "panic", triggerDefrag, AnyMember} DefragBeforeCopyPanic Failpoint = goPanicFailpoint{"backend/defragBeforeCopy", triggerDefrag, AnyMember}
DefragBeforeRenamePanic Failpoint = goFailpoint{"backend/defragBeforeRename", "panic", triggerDefrag, AnyMember} DefragBeforeRenamePanic Failpoint = goPanicFailpoint{"backend/defragBeforeRename", triggerDefrag, AnyMember}
BeforeCommitPanic Failpoint = goFailpoint{"backend/beforeCommit", "panic", nil, AnyMember} BeforeCommitPanic Failpoint = goPanicFailpoint{"backend/beforeCommit", nil, AnyMember}
AfterCommitPanic Failpoint = goFailpoint{"backend/afterCommit", "panic", nil, AnyMember} AfterCommitPanic Failpoint = goPanicFailpoint{"backend/afterCommit", nil, AnyMember}
RaftBeforeSavePanic Failpoint = goFailpoint{"etcdserver/raftBeforeSave", "panic", nil, AnyMember} RaftBeforeSavePanic Failpoint = goPanicFailpoint{"etcdserver/raftBeforeSave", nil, AnyMember}
RaftAfterSavePanic Failpoint = goFailpoint{"etcdserver/raftAfterSave", "panic", nil, AnyMember} RaftAfterSavePanic Failpoint = goPanicFailpoint{"etcdserver/raftAfterSave", nil, AnyMember}
BackendBeforePreCommitHookPanic Failpoint = goFailpoint{"backend/commitBeforePreCommitHook", "panic", nil, AnyMember} BackendBeforePreCommitHookPanic Failpoint = goPanicFailpoint{"backend/commitBeforePreCommitHook", nil, AnyMember}
BackendAfterPreCommitHookPanic Failpoint = goFailpoint{"backend/commitAfterPreCommitHook", "panic", nil, AnyMember} BackendAfterPreCommitHookPanic Failpoint = goPanicFailpoint{"backend/commitAfterPreCommitHook", nil, AnyMember}
BackendBeforeStartDBTxnPanic Failpoint = goFailpoint{"backend/beforeStartDBTxn", "panic", nil, AnyMember} BackendBeforeStartDBTxnPanic Failpoint = goPanicFailpoint{"backend/beforeStartDBTxn", nil, AnyMember}
BackendAfterStartDBTxnPanic Failpoint = goFailpoint{"backend/afterStartDBTxn", "panic", nil, AnyMember} BackendAfterStartDBTxnPanic Failpoint = goPanicFailpoint{"backend/afterStartDBTxn", nil, AnyMember}
BackendBeforeWritebackBufPanic Failpoint = goFailpoint{"backend/beforeWritebackBuf", "panic", nil, AnyMember} BackendBeforeWritebackBufPanic Failpoint = goPanicFailpoint{"backend/beforeWritebackBuf", nil, AnyMember}
BackendAfterWritebackBufPanic Failpoint = goFailpoint{"backend/afterWritebackBuf", "panic", nil, AnyMember} BackendAfterWritebackBufPanic Failpoint = goPanicFailpoint{"backend/afterWritebackBuf", nil, AnyMember}
CompactBeforeCommitScheduledCompactPanic Failpoint = goFailpoint{"mvcc/compactBeforeCommitScheduledCompact", "panic", triggerCompact, AnyMember} CompactBeforeCommitScheduledCompactPanic Failpoint = goPanicFailpoint{"mvcc/compactBeforeCommitScheduledCompact", triggerCompact, AnyMember}
CompactAfterCommitScheduledCompactPanic Failpoint = goFailpoint{"mvcc/compactAfterCommitScheduledCompact", "panic", triggerCompact, AnyMember} CompactAfterCommitScheduledCompactPanic Failpoint = goPanicFailpoint{"mvcc/compactAfterCommitScheduledCompact", triggerCompact, AnyMember}
CompactBeforeSetFinishedCompactPanic Failpoint = goFailpoint{"mvcc/compactBeforeSetFinishedCompact", "panic", triggerCompact, AnyMember} CompactBeforeSetFinishedCompactPanic Failpoint = goPanicFailpoint{"mvcc/compactBeforeSetFinishedCompact", triggerCompact, AnyMember}
CompactAfterSetFinishedCompactPanic Failpoint = goFailpoint{"mvcc/compactAfterSetFinishedCompact", "panic", triggerCompact, AnyMember} CompactAfterSetFinishedCompactPanic Failpoint = goPanicFailpoint{"mvcc/compactAfterSetFinishedCompact", triggerCompact, AnyMember}
CompactBeforeCommitBatchPanic Failpoint = goFailpoint{"mvcc/compactBeforeCommitBatch", "panic", triggerCompact, AnyMember} CompactBeforeCommitBatchPanic Failpoint = goPanicFailpoint{"mvcc/compactBeforeCommitBatch", triggerCompact, AnyMember}
CompactAfterCommitBatchPanic Failpoint = goFailpoint{"mvcc/compactAfterCommitBatch", "panic", triggerCompact, AnyMember} CompactAfterCommitBatchPanic Failpoint = goPanicFailpoint{"mvcc/compactAfterCommitBatch", triggerCompact, AnyMember}
RaftBeforeLeaderSendPanic Failpoint = goFailpoint{"etcdserver/raftBeforeLeaderSend", "panic", nil, Leader} RaftBeforeLeaderSendPanic Failpoint = goPanicFailpoint{"etcdserver/raftBeforeLeaderSend", nil, Leader}
RandomFailpoint Failpoint = randomFailpoint{[]Failpoint{ RandomFailpoint Failpoint = randomFailpoint{[]Failpoint{
KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic, KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic,
RaftAfterSavePanic, DefragBeforeCopyPanic, DefragBeforeRenamePanic, RaftAfterSavePanic, DefragBeforeCopyPanic, DefragBeforeRenamePanic,
@ -64,12 +68,12 @@ var (
RaftBeforeLeaderSendPanic, RaftBeforeLeaderSendPanic,
}} }}
// TODO: Figure out how to reliably trigger below failpoints and add them to RandomFailpoint // TODO: Figure out how to reliably trigger below failpoints and add them to RandomFailpoint
raftBeforeApplySnapPanic Failpoint = goFailpoint{"etcdserver/raftBeforeApplySnap", "panic", nil, AnyMember} raftBeforeApplySnapPanic Failpoint = goPanicFailpoint{"etcdserver/raftBeforeApplySnap", nil, AnyMember}
raftAfterApplySnapPanic Failpoint = goFailpoint{"etcdserver/raftAfterApplySnap", "panic", nil, AnyMember} raftAfterApplySnapPanic Failpoint = goPanicFailpoint{"etcdserver/raftAfterApplySnap", nil, AnyMember}
raftAfterWALReleasePanic Failpoint = goFailpoint{"etcdserver/raftAfterWALRelease", "panic", nil, AnyMember} raftAfterWALReleasePanic Failpoint = goPanicFailpoint{"etcdserver/raftAfterWALRelease", nil, AnyMember}
raftBeforeFollowerSendPanic Failpoint = goFailpoint{"etcdserver/raftBeforeFollowerSend", "panic", nil, AnyMember} raftBeforeFollowerSendPanic Failpoint = goPanicFailpoint{"etcdserver/raftBeforeFollowerSend", nil, AnyMember}
raftBeforeSaveSnapPanic Failpoint = goFailpoint{"etcdserver/raftBeforeSaveSnap", "panic", nil, AnyMember} raftBeforeSaveSnapPanic Failpoint = goPanicFailpoint{"etcdserver/raftBeforeSaveSnap", nil, AnyMember}
raftAfterSaveSnapPanic Failpoint = goFailpoint{"etcdserver/raftAfterSaveSnap", "panic", nil, AnyMember} raftAfterSaveSnapPanic Failpoint = goPanicFailpoint{"etcdserver/raftAfterSaveSnap", nil, AnyMember}
) )
type Failpoint interface { type Failpoint interface {
@ -81,15 +85,21 @@ type killFailpoint struct{}
func (f killFailpoint) Trigger(t *testing.T, ctx context.Context, clus *e2e.EtcdProcessCluster) error { func (f killFailpoint) Trigger(t *testing.T, ctx context.Context, clus *e2e.EtcdProcessCluster) error {
member := clus.Procs[rand.Int()%len(clus.Procs)] member := clus.Procs[rand.Int()%len(clus.Procs)]
err := member.Kill()
if err != nil { killCtx, cancel := context.WithTimeout(ctx, triggerTimeout)
return err defer cancel()
for member.IsRunning() {
err := member.Kill()
if err != nil {
t.Logf("sending kill signal failed: %v", err)
}
err = member.Wait(killCtx)
if err != nil && !strings.Contains(err.Error(), "unexpected exit code") {
return fmt.Errorf("failed to kill the process within %s, err: %w", triggerTimeout, err)
}
} }
err = member.Wait()
if err != nil && !strings.Contains(err.Error(), "unexpected exit code") { err := member.Start(ctx)
return err
}
err = member.Start(ctx)
if err != nil { if err != nil {
return err return err
} }
@ -100,9 +110,8 @@ func (f killFailpoint) Name() string {
return "Kill" return "Kill"
} }
type goFailpoint struct { type goPanicFailpoint struct {
failpoint string failpoint string
payload string
trigger func(ctx context.Context, member e2e.EtcdProcess) error trigger func(ctx context.Context, member e2e.EtcdProcess) error
target failpointTarget target failpointTarget
} }
@ -114,45 +123,55 @@ const (
Leader failpointTarget = "Leader" Leader failpointTarget = "Leader"
) )
func (f goFailpoint) Trigger(t *testing.T, ctx context.Context, clus *e2e.EtcdProcessCluster) error { func (f goPanicFailpoint) Trigger(t *testing.T, ctx context.Context, clus *e2e.EtcdProcessCluster) error {
var member e2e.EtcdProcess member := f.pickMember(t, clus)
switch f.target {
case AnyMember:
member = clus.Procs[rand.Int()%len(clus.Procs)]
case Leader:
member = clus.Procs[clus.WaitLeader(t)]
default:
panic("unknown target")
}
address := fmt.Sprintf("127.0.0.1:%d", member.Config().GoFailPort) address := fmt.Sprintf("127.0.0.1:%d", member.Config().GoFailPort)
err := setupGoFailpoint(address, f.failpoint, f.payload)
if err != nil { triggerCtx, cancel := context.WithTimeout(ctx, triggerTimeout)
return fmt.Errorf("gofailpoint setup failed: %w", err) defer cancel()
}
if f.trigger != nil { for member.IsRunning() {
err = f.trigger(ctx, member) err := setupGoFailpoint(triggerCtx, address, f.failpoint, "panic")
if err != nil { if err != nil {
return fmt.Errorf("triggering gofailpoint failed: %w", err) t.Logf("gofailpoint setup failed: %v", err)
}
if f.trigger != nil {
err = f.trigger(triggerCtx, member)
if err != nil {
t.Logf("triggering gofailpoint failed: %v", err)
}
}
err = member.Wait(triggerCtx)
if err != nil && !strings.Contains(err.Error(), "unexpected exit code") {
return fmt.Errorf("failed to trigger a process panic within %s, err: %w", triggerTimeout, err)
} }
} }
err = member.Wait()
if err != nil && !strings.Contains(err.Error(), "unexpected exit code") { err := member.Start(ctx)
return err
}
err = member.Start(ctx)
if err != nil { if err != nil {
return err return err
} }
return nil return nil
} }
func setupGoFailpoint(host, failpoint, payload string) error { func (f goPanicFailpoint) pickMember(t *testing.T, clus *e2e.EtcdProcessCluster) e2e.EtcdProcess {
switch f.target {
case AnyMember:
return clus.Procs[rand.Int()%len(clus.Procs)]
case Leader:
return clus.Procs[clus.WaitLeader(t)]
default:
panic("unknown target")
}
}
func setupGoFailpoint(ctx context.Context, host, failpoint, payload string) error {
failpointUrl := url.URL{ failpointUrl := url.URL{
Scheme: "http", Scheme: "http",
Host: host, Host: host,
Path: failpoint, Path: failpoint,
} }
r, err := http.NewRequest("PUT", failpointUrl.String(), bytes.NewBuffer([]byte(payload))) r, err := http.NewRequestWithContext(ctx, "PUT", failpointUrl.String(), bytes.NewBuffer([]byte(payload)))
if err != nil { if err != nil {
return err return err
} }
@ -167,7 +186,7 @@ func setupGoFailpoint(host, failpoint, payload string) error {
return nil return nil
} }
func (f goFailpoint) Name() string { func (f goPanicFailpoint) Name() string {
return f.failpoint return f.failpoint
} }