etcd/tests/e2e/graceful_shutdown_test.go
Wei Fu aa97484166 *: enable goimports in verify-lint
Signed-off-by: Wei Fu <fuweid89@gmail.com>
2023-09-21 21:14:09 +08:00

116 lines
3.5 KiB
Go

// Copyright 2023 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package e2e
import (
"context"
"fmt"
"testing"
"time"
"github.com/stretchr/testify/require"
"go.etcd.io/etcd/tests/v3/framework/config"
"go.etcd.io/etcd/tests/v3/framework/e2e"
"go.etcd.io/etcd/tests/v3/framework/interfaces"
"go.etcd.io/raft/v3"
)
func TestGracefulShutdown(t *testing.T) {
tcs := []struct {
name string
clusterSize int
}{
{
name: "clusterSize3",
clusterSize: 3,
},
{
name: "clusterSize5",
clusterSize: 5,
},
}
for _, tc := range tcs {
t.Run(tc.name, func(t *testing.T) {
testRunner := e2e.NewE2eRunner()
testRunner.BeforeTest(t)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
clus := testRunner.NewCluster(ctx, t, config.WithClusterSize(tc.clusterSize))
// clean up orphaned resources like closing member client.
defer clus.Close()
// shutdown each etcd member process sequentially
// and start from old leader, (new leader), (follower)
tryShutdownLeader(ctx, t, clus.Members())
})
}
}
// tryShutdownLeader tries stop etcd member if it is leader.
// it also asserts stop leader should not take longer than 1.5 seconds and leaderID has been changed within 500ms.
func tryShutdownLeader(ctx context.Context, t *testing.T, members []interfaces.Member) {
quorum := len(members)/2 + 1
for len(members) > quorum {
leader, leaderID, term, followers := getLeader(ctx, t, members)
stopped := make(chan error, 1)
go func() {
// each etcd server will wait up to 1 seconds to close all idle connections in peer handler.
start := time.Now()
leader.Stop()
took := time.Since(start)
if took > 1500*time.Millisecond {
stopped <- fmt.Errorf("leader stop took %v longer than 1.5 seconds", took)
return
}
stopped <- nil
}()
// etcd election timeout could range from 1s to 2s without explicit leadership transfer.
// assert leader ID has been changed within 500ms
time.Sleep(500 * time.Millisecond)
resps, err := followers[0].Client().Status(ctx)
require.NoError(t, err)
require.NotEqual(t, leaderID, raft.None)
require.Equal(t, resps[0].RaftTerm, term+1)
require.NotEqualf(t, resps[0].Leader, leaderID, "expect old leaderID %x changed to new leader ID %x", leaderID, resps[0].Leader)
err = <-stopped
require.NoError(t, err)
members = followers
}
}
func getLeader(ctx context.Context, t *testing.T, members []interfaces.Member) (leader interfaces.Member, leaderID, term uint64, followers []interfaces.Member) {
leaderIdx := -1
for i, m := range members {
mc := m.Client()
sresps, err := mc.Status(ctx)
require.NoError(t, err)
if sresps[0].Leader == sresps[0].Header.MemberId {
leaderIdx = i
leaderID = sresps[0].Leader
term = sresps[0].RaftTerm
break
}
}
if leaderIdx == -1 {
return nil, 0, 0, members
}
leader = members[leaderIdx]
return leader, leaderID, term, append(members[:leaderIdx], members[leaderIdx+1:]...)
}