mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
tests/e2e: add graceful shutdown test
Signed-off-by: Chao Chen <chaochn@amazon.com>
This commit is contained in:
parent
0a7dd2186b
commit
f31d0eafb9
@ -1202,8 +1202,8 @@ func (s *EtcdServer) MoveLeader(ctx context.Context, lead, transferee uint64) er
|
||||
return nil
|
||||
}
|
||||
|
||||
// TransferLeadership transfers the leader to the chosen transferee.
|
||||
func (s *EtcdServer) TransferLeadership() error {
|
||||
// TryTransferLeadershipOnShutdown transfers the leader to the chosen transferee. It is only used in server graceful shutdown.
|
||||
func (s *EtcdServer) TryTransferLeadershipOnShutdown() error {
|
||||
lg := s.Logger()
|
||||
if !s.isLeader() {
|
||||
lg.Info(
|
||||
@ -1253,7 +1253,7 @@ func (s *EtcdServer) HardStop() {
|
||||
// Do and Process cannot be called after Stop has been invoked.
|
||||
func (s *EtcdServer) Stop() {
|
||||
lg := s.Logger()
|
||||
if err := s.TransferLeadership(); err != nil {
|
||||
if err := s.TryTransferLeadershipOnShutdown(); err != nil {
|
||||
lg.Warn("leadership transfer failed", zap.String("local-member-id", s.MemberId().String()), zap.Error(err))
|
||||
}
|
||||
s.HardStop()
|
||||
|
116
tests/e2e/graceful_shutdown_test.go
Normal file
116
tests/e2e/graceful_shutdown_test.go
Normal file
@ -0,0 +1,116 @@
|
||||
// Copyright 2023 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package e2e
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"go.etcd.io/raft/v3"
|
||||
|
||||
"go.etcd.io/etcd/tests/v3/framework/config"
|
||||
"go.etcd.io/etcd/tests/v3/framework/e2e"
|
||||
"go.etcd.io/etcd/tests/v3/framework/interfaces"
|
||||
)
|
||||
|
||||
func TestGracefulShutdown(t *testing.T) {
|
||||
tcs := []struct {
|
||||
name string
|
||||
clusterSize int
|
||||
}{
|
||||
{
|
||||
name: "clusterSize3",
|
||||
clusterSize: 3,
|
||||
},
|
||||
{
|
||||
name: "clusterSize5",
|
||||
clusterSize: 5,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tcs {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
testRunner := e2e.NewE2eRunner()
|
||||
testRunner.BeforeTest(t)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
clus := testRunner.NewCluster(ctx, t, config.WithClusterSize(tc.clusterSize))
|
||||
// clean up orphaned resources like closing member client.
|
||||
defer clus.Close()
|
||||
// shutdown each etcd member process sequentially
|
||||
// and start from old leader, (new leader), (follower)
|
||||
tryShutdownLeader(ctx, t, clus.Members())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// tryShutdownLeader tries stop etcd member if it is leader.
|
||||
// it also asserts stop leader should not take longer than 1.5 seconds and leaderID has been changed within 500ms.
|
||||
func tryShutdownLeader(ctx context.Context, t *testing.T, members []interfaces.Member) {
|
||||
quorum := len(members)/2 + 1
|
||||
for len(members) > quorum {
|
||||
leader, leaderID, term, followers := getLeader(ctx, t, members)
|
||||
stopped := make(chan error, 1)
|
||||
go func() {
|
||||
// each etcd server will wait up to 1 seconds to close all idle connections in peer handler.
|
||||
start := time.Now()
|
||||
leader.Stop()
|
||||
took := time.Since(start)
|
||||
if took > 1500*time.Millisecond {
|
||||
stopped <- fmt.Errorf("leader stop took %v longer than 1.5 seconds", took)
|
||||
return
|
||||
}
|
||||
stopped <- nil
|
||||
}()
|
||||
|
||||
// etcd election timeout could range from 1s to 2s without explicit leadership transfer.
|
||||
// assert leader ID has been changed within 500ms
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
resps, err := followers[0].Client().Status(ctx)
|
||||
require.NoError(t, err)
|
||||
require.NotEqual(t, leaderID, raft.None)
|
||||
require.Equal(t, resps[0].RaftTerm, term+1)
|
||||
require.NotEqualf(t, resps[0].Leader, leaderID, "expect old leaderID %x changed to new leader ID %x", leaderID, resps[0].Leader)
|
||||
|
||||
err = <-stopped
|
||||
require.NoError(t, err)
|
||||
|
||||
members = followers
|
||||
}
|
||||
}
|
||||
|
||||
func getLeader(ctx context.Context, t *testing.T, members []interfaces.Member) (leader interfaces.Member, leaderID, term uint64, followers []interfaces.Member) {
|
||||
leaderIdx := -1
|
||||
for i, m := range members {
|
||||
mc := m.Client()
|
||||
sresps, err := mc.Status(ctx)
|
||||
require.NoError(t, err)
|
||||
if sresps[0].Leader == sresps[0].Header.MemberId {
|
||||
leaderIdx = i
|
||||
leaderID = sresps[0].Leader
|
||||
term = sresps[0].RaftTerm
|
||||
break
|
||||
}
|
||||
}
|
||||
if leaderIdx == -1 {
|
||||
return nil, 0, 0, members
|
||||
}
|
||||
leader = members[leaderIdx]
|
||||
return leader, leaderID, term, append(members[:leaderIdx], members[leaderIdx+1:]...)
|
||||
}
|
@ -58,7 +58,7 @@ func testMoveLeader(t *testing.T, auto bool) {
|
||||
|
||||
target := uint64(clus.Members[(oldLeadIdx+1)%3].Server.MemberId())
|
||||
if auto {
|
||||
err := clus.Members[oldLeadIdx].Server.TransferLeadership()
|
||||
err := clus.Members[oldLeadIdx].Server.TryTransferLeadershipOnShutdown()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -149,7 +149,7 @@ func TestMoveLeaderToLearnerError(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestTransferLeadershipWithLearner ensures TransferLeadership does not timeout due to learner is
|
||||
// TestTransferLeadershipWithLearner ensures TryTransferLeadershipOnShutdown does not timeout due to learner is
|
||||
// automatically picked by leader as transferee.
|
||||
func TestTransferLeadershipWithLearner(t *testing.T) {
|
||||
integration.BeforeTest(t)
|
||||
@ -170,9 +170,9 @@ func TestTransferLeadershipWithLearner(t *testing.T) {
|
||||
leaderIdx := clus.WaitLeader(t)
|
||||
errCh := make(chan error, 1)
|
||||
go func() {
|
||||
// note that this cluster has 1 leader and 1 learner. TransferLeadership should return nil.
|
||||
// note that this cluster has 1 leader and 1 learner. TryTransferLeadershipOnShutdown should return nil.
|
||||
// Leadership transfer is skipped in cluster with 1 voting member.
|
||||
errCh <- clus.Members[leaderIdx].Server.TransferLeadership()
|
||||
errCh <- clus.Members[leaderIdx].Server.TryTransferLeadershipOnShutdown()
|
||||
}()
|
||||
select {
|
||||
case err := <-errCh:
|
||||
|
Loading…
x
Reference in New Issue
Block a user