mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Merge pull request #17152 from serathius/logs-recover-backend
Improve logs around recovering snapshot backend and add an e2e test
This commit is contained in:
@@ -227,6 +227,14 @@ func bootstrapBackend(cfg config.ServerConfig, haveWAL bool, st v2store.Store, s
|
||||
}
|
||||
}
|
||||
if beExist {
|
||||
s1, s2 := be.Size(), be.SizeInUse()
|
||||
cfg.Logger.Info(
|
||||
"recovered v3 backend",
|
||||
zap.Int64("backend-size-bytes", s1),
|
||||
zap.String("backend-size", humanize.Bytes(uint64(s1))),
|
||||
zap.Int64("backend-size-in-use-bytes", s2),
|
||||
zap.String("backend-size-in-use", humanize.Bytes(uint64(s2))),
|
||||
)
|
||||
if err = schema.Validate(cfg.Logger, be.ReadTx()); err != nil {
|
||||
cfg.Logger.Error("Failed to validate schema", zap.Error(err))
|
||||
return nil, err
|
||||
@@ -414,14 +422,6 @@ func recoverSnapshot(cfg config.ServerConfig, st v2store.Store, be backend.Backe
|
||||
// already been closed in this case, so we should set the backend again.
|
||||
ci.SetBackend(be)
|
||||
|
||||
s1, s2 := be.Size(), be.SizeInUse()
|
||||
cfg.Logger.Info(
|
||||
"recovered v3 backend from snapshot",
|
||||
zap.Int64("backend-size-bytes", s1),
|
||||
zap.String("backend-size", humanize.Bytes(uint64(s1))),
|
||||
zap.Int64("backend-size-in-use-bytes", s2),
|
||||
zap.String("backend-size-in-use", humanize.Bytes(uint64(s2))),
|
||||
)
|
||||
if beExist {
|
||||
// TODO: remove kvindex != 0 checking when we do not expect users to upgrade
|
||||
// etcd from pre-3.0 release.
|
||||
|
||||
@@ -1001,6 +1001,7 @@ func (s *EtcdServer) applySnapshot(ep *etcdProgress, toApply *toApply) {
|
||||
// wait for raftNode to persist snapshot onto the disk
|
||||
<-toApply.notifyc
|
||||
|
||||
// gofail: var applyBeforeOpenSnapshot struct{}
|
||||
newbe, err := serverstorage.OpenSnapshotBackend(s.Cfg, s.snapshotter, toApply.snapshot, s.beHooks)
|
||||
if err != nil {
|
||||
lg.Panic("failed to open snapshot backend", zap.Error(err))
|
||||
|
||||
@@ -105,8 +105,10 @@ func RecoverSnapshotBackend(cfg config.ServerConfig, oldbe backend.Backend, snap
|
||||
consistentIndex, _ = schema.ReadConsistentIndex(oldbe.ReadTx())
|
||||
}
|
||||
if snapshot.Metadata.Index <= consistentIndex {
|
||||
cfg.Logger.Info("Skipping snapshot backend", zap.Uint64("consistent-index", consistentIndex), zap.Uint64("snapshot-index", snapshot.Metadata.Index))
|
||||
return oldbe, nil
|
||||
}
|
||||
cfg.Logger.Info("Recovering from snapshot backend", zap.Uint64("consistent-index", consistentIndex), zap.Uint64("snapshot-index", snapshot.Metadata.Index))
|
||||
oldbe.Close()
|
||||
return OpenSnapshotBackend(cfg, snap.New(cfg.Logger, cfg.SnapDir()), snapshot, hooks)
|
||||
}
|
||||
|
||||
98
tests/e2e/leader_snapshot_no_proxy_test.go
Normal file
98
tests/e2e/leader_snapshot_no_proxy_test.go
Normal file
@@ -0,0 +1,98 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build !cluster_proxy
|
||||
|
||||
package e2e
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.uber.org/zap"
|
||||
|
||||
clientv3 "go.etcd.io/etcd/client/v3"
|
||||
"go.etcd.io/etcd/pkg/v3/expect"
|
||||
"go.etcd.io/etcd/tests/v3/framework/config"
|
||||
"go.etcd.io/etcd/tests/v3/framework/e2e"
|
||||
"go.etcd.io/etcd/tests/v3/robustness/failpoint"
|
||||
)
|
||||
|
||||
func TestRecoverSnapshotBackend(t *testing.T) {
|
||||
e2e.BeforeTest(t)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
epc, err := e2e.NewEtcdProcessCluster(ctx, t,
|
||||
e2e.WithClusterSize(3),
|
||||
e2e.WithKeepDataDir(true),
|
||||
e2e.WithPeerProxy(true),
|
||||
e2e.WithSnapshotCatchUpEntries(50),
|
||||
e2e.WithSnapshotCount(50),
|
||||
e2e.WithGoFailEnabled(true),
|
||||
e2e.WithIsPeerTLS(true),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
defer epc.Close()
|
||||
|
||||
blackholedMember := epc.Procs[0]
|
||||
otherMember := epc.Procs[1]
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
|
||||
trafficCtx, trafficCancel := context.WithCancel(ctx)
|
||||
c, err := clientv3.New(clientv3.Config{
|
||||
Endpoints: otherMember.EndpointsGRPC(),
|
||||
Logger: zap.NewNop(),
|
||||
DialKeepAliveTime: 10 * time.Second,
|
||||
DialKeepAliveTimeout: 100 * time.Millisecond,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
defer c.Close()
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for {
|
||||
select {
|
||||
case <-trafficCtx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
putCtx, putCancel := context.WithTimeout(trafficCtx, 50*time.Millisecond)
|
||||
c.Put(putCtx, "a", "b")
|
||||
putCancel()
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
}()
|
||||
|
||||
err = blackholedMember.Failpoints().SetupHTTP(ctx, "applyBeforeOpenSnapshot", "panic")
|
||||
require.NoError(t, err)
|
||||
err = failpoint.Blackhole(ctx, t, blackholedMember, epc, true)
|
||||
require.NoError(t, err)
|
||||
err = blackholedMember.Wait(ctx)
|
||||
require.NoError(t, err)
|
||||
trafficCancel()
|
||||
wg.Wait()
|
||||
err = blackholedMember.Start(ctx)
|
||||
require.NoError(t, err)
|
||||
_, err = blackholedMember.Logs().ExpectWithContext(ctx, expect.ExpectedResponse{Value: "Recovering from snapshot backend"})
|
||||
assert.NoError(t, err)
|
||||
err = blackholedMember.Etcdctl().Put(ctx, "a", "1", config.PutOptions{})
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
@@ -50,6 +50,7 @@ var (
|
||||
DropPeerNetwork,
|
||||
RaftBeforeSaveSleep,
|
||||
RaftAfterSaveSleep,
|
||||
ApplyBeforeOpenSnapshot,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@@ -53,6 +53,7 @@ var (
|
||||
RaftAfterWALReleasePanic Failpoint = goPanicFailpoint{"raftAfterWALRelease", triggerBlackhole{waitTillSnapshot: true}, Follower}
|
||||
RaftBeforeSaveSnapPanic Failpoint = goPanicFailpoint{"raftBeforeSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower}
|
||||
RaftAfterSaveSnapPanic Failpoint = goPanicFailpoint{"raftAfterSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower}
|
||||
ApplyBeforeOpenSnapshot Failpoint = goPanicFailpoint{"applyBeforeOpenSnapshot", triggerBlackhole{waitTillSnapshot: true}, Follower}
|
||||
BeforeApplyOneConfChangeSleep Failpoint = killAndGofailSleep{"beforeApplyOneConfChange", time.Second}
|
||||
RaftBeforeSaveSleep Failpoint = gofailSleepAndDeactivate{"raftBeforeSave", time.Second}
|
||||
RaftAfterSaveSleep Failpoint = gofailSleepAndDeactivate{"raftAfterSave", time.Second}
|
||||
|
||||
@@ -51,7 +51,7 @@ type triggerBlackhole struct {
|
||||
}
|
||||
|
||||
func (tb triggerBlackhole) Trigger(ctx context.Context, t *testing.T, member e2e.EtcdProcess, clus *e2e.EtcdProcessCluster) error {
|
||||
return blackhole(ctx, t, member, clus, tb.waitTillSnapshot)
|
||||
return Blackhole(ctx, t, member, clus, tb.waitTillSnapshot)
|
||||
}
|
||||
|
||||
func (tb triggerBlackhole) Available(config e2e.EtcdProcessClusterConfig, process e2e.EtcdProcess) bool {
|
||||
@@ -61,7 +61,7 @@ func (tb triggerBlackhole) Available(config e2e.EtcdProcessClusterConfig, proces
|
||||
return config.ClusterSize > 1 && process.PeerProxy() != nil
|
||||
}
|
||||
|
||||
func blackhole(ctx context.Context, t *testing.T, member e2e.EtcdProcess, clus *e2e.EtcdProcessCluster, shouldWaitTillSnapshot bool) error {
|
||||
func Blackhole(ctx context.Context, t *testing.T, member e2e.EtcdProcess, clus *e2e.EtcdProcessCluster, shouldWaitTillSnapshot bool) error {
|
||||
proxy := member.PeerProxy()
|
||||
|
||||
// Blackholing will cause peers to not be able to use streamWriters registered with member
|
||||
|
||||
Reference in New Issue
Block a user