mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00

- ignore "transport is closing" error during connections warmup after stopping one peer. Signed-off-by: Bogdan Kanivets <bkanivets@apple.com>
324 lines
9.0 KiB
Go
324 lines
9.0 KiB
Go
// Copyright 2018 The etcd Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package balancer
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
"testing"
|
|
|
|
"go.etcd.io/etcd/clientv3/balancer/picker"
|
|
"go.etcd.io/etcd/clientv3/balancer/resolver/endpoint"
|
|
pb "go.etcd.io/etcd/etcdserver/etcdserverpb"
|
|
"go.etcd.io/etcd/pkg/mock/mockserver"
|
|
|
|
"go.uber.org/zap"
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/codes"
|
|
"google.golang.org/grpc/peer"
|
|
"google.golang.org/grpc/status"
|
|
)
|
|
|
|
// TestRoundRobinBalancedResolvableNoFailover ensures that
|
|
// requests to a resolvable endpoint can be balanced between
|
|
// multiple, if any, nodes. And there needs be no failover.
|
|
func TestRoundRobinBalancedResolvableNoFailover(t *testing.T) {
|
|
testCases := []struct {
|
|
name string
|
|
serverCount int
|
|
reqN int
|
|
network string
|
|
}{
|
|
{name: "rrBalanced_1", serverCount: 1, reqN: 5, network: "tcp"},
|
|
{name: "rrBalanced_1_unix_sockets", serverCount: 1, reqN: 5, network: "unix"},
|
|
{name: "rrBalanced_3", serverCount: 3, reqN: 7, network: "tcp"},
|
|
{name: "rrBalanced_5", serverCount: 5, reqN: 10, network: "tcp"},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
ms, err := mockserver.StartMockServersOnNetwork(tc.serverCount, tc.network)
|
|
if err != nil {
|
|
t.Fatalf("failed to start mock servers: %v", err)
|
|
}
|
|
defer ms.Stop()
|
|
|
|
var eps []string
|
|
for _, svr := range ms.Servers {
|
|
eps = append(eps, svr.ResolverAddress().Addr)
|
|
}
|
|
|
|
rsv, err := endpoint.NewResolverGroup("nofailover")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer rsv.Close()
|
|
rsv.SetEndpoints(eps)
|
|
|
|
name := genName()
|
|
cfg := Config{
|
|
Policy: picker.RoundrobinBalanced,
|
|
Name: name,
|
|
Logger: zap.NewExample(),
|
|
}
|
|
RegisterBuilder(cfg)
|
|
conn, err := grpc.Dial(fmt.Sprintf("endpoint://nofailover/*"), grpc.WithInsecure(), grpc.WithBalancerName(name))
|
|
if err != nil {
|
|
t.Fatalf("failed to dial mock server: %v", err)
|
|
}
|
|
defer conn.Close()
|
|
cli := pb.NewKVClient(conn)
|
|
|
|
reqFunc := func(ctx context.Context) (picked string, err error) {
|
|
var p peer.Peer
|
|
_, err = cli.Range(ctx, &pb.RangeRequest{Key: []byte("/x")}, grpc.Peer(&p))
|
|
if p.Addr != nil {
|
|
picked = p.Addr.String()
|
|
}
|
|
return picked, err
|
|
}
|
|
|
|
_, picked, err := warmupConnections(reqFunc, tc.serverCount, "")
|
|
if err != nil {
|
|
t.Fatalf("Unexpected failure %v", err)
|
|
}
|
|
|
|
// verify that we round robin
|
|
prev, switches := picked, 0
|
|
for i := 0; i < tc.reqN; i++ {
|
|
picked, err = reqFunc(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("#%d: unexpected failure %v", i, err)
|
|
}
|
|
if prev != picked {
|
|
switches++
|
|
}
|
|
prev = picked
|
|
}
|
|
if tc.serverCount > 1 && switches != tc.reqN {
|
|
t.Fatalf("expected balanced loads for %d requests, got switches %d", tc.reqN, switches)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestRoundRobinBalancedResolvableFailoverFromServerFail ensures that
|
|
// loads be rebalanced while one server goes down and comes back.
|
|
func TestRoundRobinBalancedResolvableFailoverFromServerFail(t *testing.T) {
|
|
serverCount := 5
|
|
ms, err := mockserver.StartMockServers(serverCount)
|
|
if err != nil {
|
|
t.Fatalf("failed to start mock servers: %s", err)
|
|
}
|
|
defer ms.Stop()
|
|
var eps []string
|
|
for _, svr := range ms.Servers {
|
|
eps = append(eps, svr.ResolverAddress().Addr)
|
|
}
|
|
|
|
rsv, err := endpoint.NewResolverGroup("serverfail")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer rsv.Close()
|
|
rsv.SetEndpoints(eps)
|
|
|
|
name := genName()
|
|
cfg := Config{
|
|
Policy: picker.RoundrobinBalanced,
|
|
Name: name,
|
|
Logger: zap.NewExample(),
|
|
}
|
|
RegisterBuilder(cfg)
|
|
conn, err := grpc.Dial(fmt.Sprintf("endpoint://serverfail/mock.server"), grpc.WithInsecure(), grpc.WithBalancerName(name))
|
|
if err != nil {
|
|
t.Fatalf("failed to dial mock server: %s", err)
|
|
}
|
|
defer conn.Close()
|
|
cli := pb.NewKVClient(conn)
|
|
|
|
reqFunc := func(ctx context.Context) (picked string, err error) {
|
|
var p peer.Peer
|
|
_, err = cli.Range(ctx, &pb.RangeRequest{Key: []byte("/x")}, grpc.Peer(&p))
|
|
if p.Addr != nil {
|
|
picked = p.Addr.String()
|
|
}
|
|
return picked, err
|
|
}
|
|
|
|
// stop first server, loads should be redistributed
|
|
ms.StopAt(0)
|
|
// stopped server will be transitioned into TRANSIENT_FAILURE state
|
|
// but it doesn't happen instantaneously and it can still be picked for a short period of time
|
|
// we ignore "transport is closing" in such case
|
|
available, picked, err := warmupConnections(reqFunc, serverCount-1, "transport is closing")
|
|
if err != nil {
|
|
t.Fatalf("Unexpected failure %v", err)
|
|
}
|
|
|
|
reqN := 10
|
|
prev, switches := picked, 0
|
|
for i := 0; i < reqN; i++ {
|
|
picked, err = reqFunc(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("#%d: unexpected failure %v", i, err)
|
|
}
|
|
if _, ok := available[picked]; !ok {
|
|
t.Fatalf("picked unavailable address %q (available %v)", picked, available)
|
|
}
|
|
if prev != picked {
|
|
switches++
|
|
}
|
|
prev = picked
|
|
}
|
|
if switches != reqN {
|
|
t.Fatalf("expected balanced loads for %d requests, got switches %d", reqN, switches)
|
|
}
|
|
|
|
// now failed server comes back
|
|
ms.StartAt(0)
|
|
available, picked, err = warmupConnections(reqFunc, serverCount, "")
|
|
if err != nil {
|
|
t.Fatalf("Unexpected failure %v", err)
|
|
}
|
|
|
|
prev, switches = picked, 0
|
|
recoveredAddr, recovered := eps[0], 0
|
|
available[recoveredAddr] = struct{}{}
|
|
|
|
for i := 0; i < 2*reqN; i++ {
|
|
picked, err := reqFunc(context.Background())
|
|
if err != nil {
|
|
t.Fatalf("#%d: unexpected failure %v", i, err)
|
|
}
|
|
if _, ok := available[picked]; !ok {
|
|
t.Fatalf("#%d: picked unavailable address %q (available %v)", i, picked, available)
|
|
}
|
|
if prev != picked {
|
|
switches++
|
|
}
|
|
if picked == recoveredAddr {
|
|
recovered++
|
|
}
|
|
prev = picked
|
|
}
|
|
if switches != 2*reqN {
|
|
t.Fatalf("expected balanced loads for %d requests, got switches %d", reqN, switches)
|
|
}
|
|
if recovered != 2*reqN/serverCount {
|
|
t.Fatalf("recovered server %q got only %d requests", recoveredAddr, recovered)
|
|
}
|
|
}
|
|
|
|
// TestRoundRobinBalancedResolvableFailoverFromRequestFail ensures that
|
|
// loads be rebalanced while some requests are failed.
|
|
func TestRoundRobinBalancedResolvableFailoverFromRequestFail(t *testing.T) {
|
|
serverCount := 5
|
|
ms, err := mockserver.StartMockServers(serverCount)
|
|
if err != nil {
|
|
t.Fatalf("failed to start mock servers: %s", err)
|
|
}
|
|
defer ms.Stop()
|
|
var eps []string
|
|
for _, svr := range ms.Servers {
|
|
eps = append(eps, svr.ResolverAddress().Addr)
|
|
}
|
|
|
|
rsv, err := endpoint.NewResolverGroup("requestfail")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer rsv.Close()
|
|
rsv.SetEndpoints(eps)
|
|
|
|
name := genName()
|
|
cfg := Config{
|
|
Policy: picker.RoundrobinBalanced,
|
|
Name: name,
|
|
Logger: zap.NewExample(),
|
|
}
|
|
RegisterBuilder(cfg)
|
|
conn, err := grpc.Dial(fmt.Sprintf("endpoint://requestfail/mock.server"), grpc.WithInsecure(), grpc.WithBalancerName(name))
|
|
if err != nil {
|
|
t.Fatalf("failed to dial mock server: %s", err)
|
|
}
|
|
defer conn.Close()
|
|
cli := pb.NewKVClient(conn)
|
|
|
|
reqFunc := func(ctx context.Context) (picked string, err error) {
|
|
var p peer.Peer
|
|
_, err = cli.Range(ctx, &pb.RangeRequest{Key: []byte("/x")}, grpc.Peer(&p))
|
|
if p.Addr != nil {
|
|
picked = p.Addr.String()
|
|
}
|
|
return picked, err
|
|
}
|
|
|
|
available, picked, err := warmupConnections(reqFunc, serverCount, "")
|
|
if err != nil {
|
|
t.Fatalf("Unexpected failure %v", err)
|
|
}
|
|
|
|
reqN := 20
|
|
prev, switches := "", 0
|
|
for i := 0; i < reqN; i++ {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
if i%2 == 0 {
|
|
cancel()
|
|
}
|
|
picked, err = reqFunc(ctx)
|
|
if i%2 == 0 {
|
|
if s, ok := status.FromError(err); ok && s.Code() != codes.Canceled {
|
|
t.Fatalf("#%d: expected %v, got %v", i, context.Canceled, err)
|
|
}
|
|
continue
|
|
}
|
|
if _, ok := available[picked]; !ok {
|
|
t.Fatalf("#%d: picked unavailable address %q (available %v)", i, picked, available)
|
|
}
|
|
if prev != picked {
|
|
switches++
|
|
}
|
|
prev = picked
|
|
}
|
|
if switches != reqN/2 {
|
|
t.Fatalf("expected balanced loads for %d requests, got switches %d", reqN, switches)
|
|
}
|
|
}
|
|
|
|
type reqFuncT = func(ctx context.Context) (picked string, err error)
|
|
|
|
func warmupConnections(reqFunc reqFuncT, serverCount int, ignoreErr string) (map[string]struct{}, string, error) {
|
|
var picked string
|
|
var err error
|
|
available := make(map[string]struct{})
|
|
// cycle through all peers to indirectly verify that balancer subconn list is fully loaded
|
|
// otherwise we can't reliably count switches between 'picked' peers in the test assert phase
|
|
for len(available) < serverCount {
|
|
picked, err = reqFunc(context.Background())
|
|
if err != nil {
|
|
if ignoreErr != "" && strings.Contains(err.Error(), ignoreErr) {
|
|
// skip ignored errors
|
|
continue
|
|
}
|
|
return available, picked, err
|
|
}
|
|
available[picked] = struct{}{}
|
|
}
|
|
return available, picked, err
|
|
}
|