Inject sleep during etcd bootstrap to reproduce https://github.com/etcd-io/etcd/issues/16666

Signed-off-by: Marek Siarkowicz <siarkowicz@google.com>
This commit is contained in:
Marek Siarkowicz 2023-10-04 22:55:35 +02:00
parent 185ddb2e78
commit 05a77032fc
2 changed files with 59 additions and 3 deletions

View File

@ -17,6 +17,7 @@ package e2e
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"net/http"
@ -338,7 +339,15 @@ type BinaryFailpoints struct {
availableCache map[string]struct{}
}
func (f *BinaryFailpoints) Setup(ctx context.Context, failpoint, payload string) error {
func (f *BinaryFailpoints) SetupEnv(failpoint, payload string) error {
if f.member.IsRunning() {
return errors.New("cannot setup environment variable while process is running")
}
f.member.Config().EnvVars["GOFAIL_FAILPOINTS"] = fmt.Sprintf("%s=%s", failpoint, payload)
return nil
}
func (f *BinaryFailpoints) SetupHTTP(ctx context.Context, failpoint, payload string) error {
host := fmt.Sprintf("127.0.0.1:%d", f.member.Config().GoFailPort)
failpointUrl := url.URL{
Scheme: "http",

View File

@ -66,6 +66,7 @@ var (
RaftAfterWALReleasePanic Failpoint = goPanicFailpoint{"raftAfterWALRelease", triggerBlackhole{waitTillSnapshot: true}, Follower}
RaftBeforeSaveSnapPanic Failpoint = goPanicFailpoint{"raftBeforeSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower}
RaftAfterSaveSnapPanic Failpoint = goPanicFailpoint{"raftAfterSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower}
beforeApplyOneConfChangeSleep Failpoint = killAndGofailSleep{"beforeApplyOneConfChange", time.Second}
allFailpoints = []Failpoint{
KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic, RaftAfterSavePanic,
DefragBeforeCopyPanic, DefragBeforeRenamePanic, BackendBeforePreCommitHookPanic, BackendAfterPreCommitHookPanic,
@ -75,6 +76,7 @@ var (
CompactAfterCommitBatchPanic, RaftBeforeLeaderSendPanic, BlackholePeerNetwork, DelayPeerNetwork,
RaftBeforeFollowerSendPanic, RaftBeforeApplySnapPanic, RaftAfterApplySnapPanic, RaftAfterWALReleasePanic,
RaftBeforeSaveSnapPanic, RaftAfterSaveSnapPanic, BlackholeUntilSnapshot,
beforeApplyOneConfChangeSleep,
}
)
@ -252,7 +254,7 @@ func (f goPanicFailpoint) Inject(ctx context.Context, t *testing.T, lg *zap.Logg
default:
}
lg.Info("Setting up gofailpoint", zap.String("failpoint", f.Name()))
err := member.Failpoints().Setup(ctx, f.failpoint, "panic")
err := member.Failpoints().SetupHTTP(ctx, f.failpoint, "panic")
if err != nil {
lg.Info("goFailpoint setup failed", zap.String("failpoint", f.Name()), zap.Error(err))
continue
@ -318,7 +320,7 @@ func (f goPanicFailpoint) Available(config e2e.EtcdProcessClusterConfig, member
}
func (f goPanicFailpoint) Name() string {
return f.failpoint
return fmt.Sprintf("%s=panic()", f.failpoint)
}
type triggerDefrag struct{}
@ -527,3 +529,48 @@ func (f delayPeerNetworkFailpoint) Name() string {
func (f delayPeerNetworkFailpoint) Available(config e2e.EtcdProcessClusterConfig, clus e2e.EtcdProcess) bool {
return config.ClusterSize > 1 && clus.PeerProxy() != nil
}
type killAndGofailSleep struct {
failpoint string
time time.Duration
}
func (f killAndGofailSleep) Inject(ctx context.Context, t *testing.T, lg *zap.Logger, clus *e2e.EtcdProcessCluster) error {
member := clus.Procs[rand.Int()%len(clus.Procs)]
for member.IsRunning() {
err := member.Kill()
if err != nil {
lg.Info("Sending kill signal failed", zap.Error(err))
}
err = member.Wait(ctx)
if err != nil && !strings.Contains(err.Error(), "unexpected exit code") {
lg.Info("Failed to kill the process", zap.Error(err))
return fmt.Errorf("failed to kill the process within %s, err: %w", triggerTimeout, err)
}
}
lg.Info("Setting up goFailpoint", zap.String("failpoint", f.Name()))
err := member.Failpoints().SetupEnv(f.failpoint, fmt.Sprintf(`sleep(%q)`, f.time))
if err != nil {
return err
}
err = member.Start(ctx)
if err != nil {
return err
}
// TODO: Check gofail status (https://github.com/etcd-io/gofail/pull/47) and wait for sleep to beis executed at least once.
return nil
}
func (f killAndGofailSleep) Name() string {
return fmt.Sprintf("%s=sleep(%s)", f.failpoint, f.time)
}
func (f killAndGofailSleep) Available(config e2e.EtcdProcessClusterConfig, member e2e.EtcdProcess) bool {
memberFailpoints := member.Failpoints()
if memberFailpoints == nil {
return false
}
available := memberFailpoints.Available()
_, found := available[f.failpoint]
return found
}