mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Add an e2e test and robustness failpoint around recovering from snapshot backend
Signed-off-by: Marek Siarkowicz <siarkowicz@google.com>
This commit is contained in:
parent
d39d86a214
commit
3471ef133d
@ -1001,7 +1001,7 @@ func (s *EtcdServer) applySnapshot(ep *etcdProgress, toApply *toApply) {
|
||||
// wait for raftNode to persist snapshot onto the disk
|
||||
<-toApply.notifyc
|
||||
|
||||
// gofail: var beforeOpenSnapshotBackend struct{}
|
||||
// gofail: var applyBeforeOpenSnapshot struct{}
|
||||
newbe, err := serverstorage.OpenSnapshotBackend(s.Cfg, s.snapshotter, toApply.snapshot, s.beHooks)
|
||||
if err != nil {
|
||||
lg.Panic("failed to open snapshot backend", zap.Error(err))
|
||||
|
98
tests/e2e/leader_snapshot_no_proxy_test.go
Normal file
98
tests/e2e/leader_snapshot_no_proxy_test.go
Normal file
@ -0,0 +1,98 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build !cluster_proxy
|
||||
|
||||
package e2e
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.uber.org/zap"
|
||||
|
||||
clientv3 "go.etcd.io/etcd/client/v3"
|
||||
"go.etcd.io/etcd/pkg/v3/expect"
|
||||
"go.etcd.io/etcd/tests/v3/framework/config"
|
||||
"go.etcd.io/etcd/tests/v3/framework/e2e"
|
||||
"go.etcd.io/etcd/tests/v3/robustness/failpoint"
|
||||
)
|
||||
|
||||
func TestRecoverSnapshotBackend(t *testing.T) {
|
||||
e2e.BeforeTest(t)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
epc, err := e2e.NewEtcdProcessCluster(ctx, t,
|
||||
e2e.WithClusterSize(3),
|
||||
e2e.WithKeepDataDir(true),
|
||||
e2e.WithPeerProxy(true),
|
||||
e2e.WithSnapshotCatchUpEntries(50),
|
||||
e2e.WithSnapshotCount(50),
|
||||
e2e.WithGoFailEnabled(true),
|
||||
e2e.WithIsPeerTLS(true),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
defer epc.Close()
|
||||
|
||||
blackholedMember := epc.Procs[0]
|
||||
otherMember := epc.Procs[1]
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
|
||||
trafficCtx, trafficCancel := context.WithCancel(ctx)
|
||||
c, err := clientv3.New(clientv3.Config{
|
||||
Endpoints: otherMember.EndpointsGRPC(),
|
||||
Logger: zap.NewNop(),
|
||||
DialKeepAliveTime: 10 * time.Second,
|
||||
DialKeepAliveTimeout: 100 * time.Millisecond,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
defer c.Close()
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for {
|
||||
select {
|
||||
case <-trafficCtx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
putCtx, putCancel := context.WithTimeout(trafficCtx, 50*time.Millisecond)
|
||||
c.Put(putCtx, "a", "b")
|
||||
putCancel()
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
}()
|
||||
|
||||
err = blackholedMember.Failpoints().SetupHTTP(ctx, "applyBeforeOpenSnapshot", "panic")
|
||||
require.NoError(t, err)
|
||||
err = failpoint.Blackhole(ctx, t, blackholedMember, epc, true)
|
||||
require.NoError(t, err)
|
||||
err = blackholedMember.Wait(ctx)
|
||||
require.NoError(t, err)
|
||||
trafficCancel()
|
||||
wg.Wait()
|
||||
err = blackholedMember.Start(ctx)
|
||||
require.NoError(t, err)
|
||||
_, err = blackholedMember.Logs().ExpectWithContext(ctx, expect.ExpectedResponse{Value: "Recovering from snapshot backend"})
|
||||
assert.NoError(t, err)
|
||||
err = blackholedMember.Etcdctl().Put(ctx, "a", "1", config.PutOptions{})
|
||||
assert.NoError(t, err)
|
||||
}
|
@ -50,6 +50,7 @@ var (
|
||||
DropPeerNetwork,
|
||||
RaftBeforeSaveSleep,
|
||||
RaftAfterSaveSleep,
|
||||
ApplyBeforeOpenSnapshot,
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -53,6 +53,7 @@ var (
|
||||
RaftAfterWALReleasePanic Failpoint = goPanicFailpoint{"raftAfterWALRelease", triggerBlackhole{waitTillSnapshot: true}, Follower}
|
||||
RaftBeforeSaveSnapPanic Failpoint = goPanicFailpoint{"raftBeforeSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower}
|
||||
RaftAfterSaveSnapPanic Failpoint = goPanicFailpoint{"raftAfterSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower}
|
||||
ApplyBeforeOpenSnapshot Failpoint = goPanicFailpoint{"applyBeforeOpenSnapshot", triggerBlackhole{waitTillSnapshot: true}, Follower}
|
||||
BeforeApplyOneConfChangeSleep Failpoint = killAndGofailSleep{"beforeApplyOneConfChange", time.Second}
|
||||
RaftBeforeSaveSleep Failpoint = gofailSleepAndDeactivate{"raftBeforeSave", time.Second}
|
||||
RaftAfterSaveSleep Failpoint = gofailSleepAndDeactivate{"raftAfterSave", time.Second}
|
||||
|
@ -51,7 +51,7 @@ type triggerBlackhole struct {
|
||||
}
|
||||
|
||||
func (tb triggerBlackhole) Trigger(ctx context.Context, t *testing.T, member e2e.EtcdProcess, clus *e2e.EtcdProcessCluster) error {
|
||||
return blackhole(ctx, t, member, clus, tb.waitTillSnapshot)
|
||||
return Blackhole(ctx, t, member, clus, tb.waitTillSnapshot)
|
||||
}
|
||||
|
||||
func (tb triggerBlackhole) Available(config e2e.EtcdProcessClusterConfig, process e2e.EtcdProcess) bool {
|
||||
@ -61,7 +61,7 @@ func (tb triggerBlackhole) Available(config e2e.EtcdProcessClusterConfig, proces
|
||||
return config.ClusterSize > 1 && process.PeerProxy() != nil
|
||||
}
|
||||
|
||||
func blackhole(ctx context.Context, t *testing.T, member e2e.EtcdProcess, clus *e2e.EtcdProcessCluster, shouldWaitTillSnapshot bool) error {
|
||||
func Blackhole(ctx context.Context, t *testing.T, member e2e.EtcdProcess, clus *e2e.EtcdProcessCluster, shouldWaitTillSnapshot bool) error {
|
||||
proxy := member.PeerProxy()
|
||||
|
||||
// Blackholing will cause peers to not be able to use streamWriters registered with member
|
||||
|
Loading…
x
Reference in New Issue
Block a user