mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
functional: add "SIGQUIT_AND_REMOVE_ONE_FOLLOWER"
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
This commit is contained in:
parent
86deeab77a
commit
9057253d8c
@ -128,6 +128,7 @@ tester-config:
|
|||||||
- SIGTERM_LEADER_UNTIL_TRIGGER_SNAPSHOT
|
- SIGTERM_LEADER_UNTIL_TRIGGER_SNAPSHOT
|
||||||
- SIGTERM_QUORUM
|
- SIGTERM_QUORUM
|
||||||
- SIGTERM_ALL
|
- SIGTERM_ALL
|
||||||
|
- SIGQUIT_AND_REMOVE_ONE_FOLLOWER
|
||||||
- BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER
|
- BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER
|
||||||
- BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT
|
- BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT
|
||||||
- BLACKHOLE_PEER_PORT_TX_RX_LEADER
|
- BLACKHOLE_PEER_PORT_TX_RX_LEADER
|
||||||
|
@ -161,6 +161,10 @@ func (clus *Cluster) updateFailures() {
|
|||||||
clus.failures = append(clus.failures,
|
clus.failures = append(clus.failures,
|
||||||
new_FailureCase_SIGTERM_ALL(clus))
|
new_FailureCase_SIGTERM_ALL(clus))
|
||||||
|
|
||||||
|
case "SIGQUIT_AND_REMOVE_ONE_FOLLOWER":
|
||||||
|
clus.failures = append(clus.failures,
|
||||||
|
new_FailureCase_SIGQUIT_AND_REMOVE_ONE_FOLLOWER(clus))
|
||||||
|
|
||||||
case "BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER":
|
case "BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER":
|
||||||
clus.failures = append(clus.failures,
|
clus.failures = append(clus.failures,
|
||||||
new_FailureCase_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER(clus))
|
new_FailureCase_BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER(clus))
|
||||||
@ -377,14 +381,12 @@ func (clus *Cluster) broadcast(op rpcpb.Operation) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (clus *Cluster) sendOp(idx int, op rpcpb.Operation) error {
|
func (clus *Cluster) sendOp(idx int, op rpcpb.Operation) error {
|
||||||
if op == rpcpb.Operation_INITIAL_START_ETCD {
|
// maintain the initial member object
|
||||||
clus.agentRequests[idx] = &rpcpb.Request{
|
// throughout the test time
|
||||||
Operation: op,
|
clus.agentRequests[idx] = &rpcpb.Request{
|
||||||
Member: clus.Members[idx],
|
Operation: op,
|
||||||
Tester: clus.Tester,
|
Member: clus.Members[idx],
|
||||||
}
|
Tester: clus.Tester,
|
||||||
} else {
|
|
||||||
clus.agentRequests[idx].Operation = op
|
|
||||||
}
|
}
|
||||||
|
|
||||||
err := clus.agentStreams[idx].Send(clus.agentRequests[idx])
|
err := clus.agentStreams[idx].Send(clus.agentRequests[idx])
|
||||||
|
@ -68,6 +68,13 @@ func read(lg *zap.Logger, fpath string) (*Cluster, error) {
|
|||||||
clus.Members[i].Etcd.WALDir = filepath.Join(mem.Etcd.DataDir, "member", "wal")
|
clus.Members[i].Etcd.WALDir = filepath.Join(mem.Etcd.DataDir, "member", "wal")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch mem.Etcd.InitialClusterState {
|
||||||
|
case "new":
|
||||||
|
case "existing":
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("'--initial-cluster-state' got %q", mem.Etcd.InitialClusterState)
|
||||||
|
}
|
||||||
|
|
||||||
if mem.Etcd.HeartbeatIntervalMs == 0 {
|
if mem.Etcd.HeartbeatIntervalMs == 0 {
|
||||||
return nil, fmt.Errorf("'--heartbeat-interval' cannot be 0 (got %+v)", mem.Etcd)
|
return nil, fmt.Errorf("'--heartbeat-interval' cannot be 0 (got %+v)", mem.Etcd)
|
||||||
}
|
}
|
||||||
|
@ -162,6 +162,7 @@ func Test_read(t *testing.T) {
|
|||||||
"SIGTERM_LEADER_UNTIL_TRIGGER_SNAPSHOT",
|
"SIGTERM_LEADER_UNTIL_TRIGGER_SNAPSHOT",
|
||||||
"SIGTERM_QUORUM",
|
"SIGTERM_QUORUM",
|
||||||
"SIGTERM_ALL",
|
"SIGTERM_ALL",
|
||||||
|
"SIGQUIT_AND_REMOVE_ONE_FOLLOWER",
|
||||||
"BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER",
|
"BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER",
|
||||||
"BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT",
|
"BLACKHOLE_PEER_PORT_TX_RX_ONE_FOLLOWER_UNTIL_TRIGGER_SNAPSHOT",
|
||||||
"BLACKHOLE_PEER_PORT_TX_RX_LEADER",
|
"BLACKHOLE_PEER_PORT_TX_RX_LEADER",
|
||||||
|
182
functional/tester/failure_case_sigquit_remove.go
Normal file
182
functional/tester/failure_case_sigquit_remove.go
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
// Copyright 2018 The etcd Authors
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package tester
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/coreos/etcd/clientv3"
|
||||||
|
"github.com/coreos/etcd/functional/rpcpb"
|
||||||
|
"go.uber.org/zap"
|
||||||
|
)
|
||||||
|
|
||||||
|
func inject_SIGQUIT_ETCD_AND_REMOVE_DATA(clus *Cluster, idx1 int) error {
|
||||||
|
cli1, err := clus.Members[idx1].CreateEtcdClient()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer cli1.Close()
|
||||||
|
|
||||||
|
var mresp *clientv3.MemberListResponse
|
||||||
|
mresp, err = cli1.MemberList(context.Background())
|
||||||
|
mss := []string{}
|
||||||
|
if err == nil && mresp != nil {
|
||||||
|
mss = describeMembers(mresp)
|
||||||
|
}
|
||||||
|
clus.lg.Info(
|
||||||
|
"member list before disastrous machine failure",
|
||||||
|
zap.String("request-to", clus.Members[idx1].EtcdClientEndpoint),
|
||||||
|
zap.Strings("members", mss),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
sresp, serr := cli1.Status(context.Background(), clus.Members[idx1].EtcdClientEndpoint)
|
||||||
|
if serr != nil {
|
||||||
|
return serr
|
||||||
|
}
|
||||||
|
id1 := sresp.Header.MemberId
|
||||||
|
is1 := fmt.Sprintf("%016x", id1)
|
||||||
|
|
||||||
|
err = clus.sendOp(idx1, rpcpb.Operation_SIGQUIT_ETCD_AND_REMOVE_DATA)
|
||||||
|
clus.lg.Info(
|
||||||
|
"disastrous machine failure",
|
||||||
|
zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
|
||||||
|
zap.String("target-member-id", is1),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
time.Sleep(3 * time.Second)
|
||||||
|
|
||||||
|
idx2 := (idx1 + 1) % len(clus.Members)
|
||||||
|
var cli2 *clientv3.Client
|
||||||
|
cli2, err = clus.Members[idx2].CreateEtcdClient()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer cli2.Close()
|
||||||
|
|
||||||
|
_, err = cli2.MemberRemove(context.Background(), id1)
|
||||||
|
clus.lg.Info(
|
||||||
|
"member remove after disaster",
|
||||||
|
zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
|
||||||
|
zap.String("target-member-id", is1),
|
||||||
|
zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
time.Sleep(5 * time.Second)
|
||||||
|
|
||||||
|
mresp, err = cli2.MemberList(context.Background())
|
||||||
|
mss = []string{}
|
||||||
|
if err == nil && mresp != nil {
|
||||||
|
mss = describeMembers(mresp)
|
||||||
|
}
|
||||||
|
clus.lg.Info(
|
||||||
|
"member list after member remove",
|
||||||
|
zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
|
||||||
|
zap.Strings("members", mss),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func recover_SIGQUIT_ETCD_AND_REMOVE_DATA(clus *Cluster, idx1 int) error {
|
||||||
|
idx2 := (idx1 + 1) % len(clus.Members)
|
||||||
|
cli2, err := clus.Members[idx2].CreateEtcdClient()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer cli2.Close()
|
||||||
|
|
||||||
|
_, err = cli2.MemberAdd(context.Background(), clus.Members[idx1].Etcd.AdvertisePeerURLs)
|
||||||
|
clus.lg.Info(
|
||||||
|
"member add before fresh restart",
|
||||||
|
zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
|
||||||
|
zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
time.Sleep(3 * time.Second)
|
||||||
|
|
||||||
|
clus.Members[idx1].Etcd.InitialClusterState = "existing"
|
||||||
|
err = clus.sendOp(idx1, rpcpb.Operation_RESTART_ETCD)
|
||||||
|
clus.lg.Info(
|
||||||
|
"fresh restart after member add",
|
||||||
|
zap.String("target-endpoint", clus.Members[idx1].EtcdClientEndpoint),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
time.Sleep(3 * time.Second)
|
||||||
|
|
||||||
|
var mresp *clientv3.MemberListResponse
|
||||||
|
mresp, err = cli2.MemberList(context.Background())
|
||||||
|
mss := []string{}
|
||||||
|
if err == nil && mresp != nil {
|
||||||
|
mss = describeMembers(mresp)
|
||||||
|
}
|
||||||
|
clus.lg.Info(
|
||||||
|
"member list after member add",
|
||||||
|
zap.String("request-to", clus.Members[idx2].EtcdClientEndpoint),
|
||||||
|
zap.Strings("members", mss),
|
||||||
|
zap.Error(err),
|
||||||
|
)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func new_FailureCase_SIGQUIT_AND_REMOVE_ONE_FOLLOWER(clus *Cluster) Failure {
|
||||||
|
ff := failureByFunc{
|
||||||
|
failureCase: rpcpb.FailureCase_SIGQUIT_AND_REMOVE_ONE_FOLLOWER,
|
||||||
|
injectMember: inject_SIGQUIT_ETCD_AND_REMOVE_DATA,
|
||||||
|
recoverMember: recover_SIGQUIT_ETCD_AND_REMOVE_DATA,
|
||||||
|
}
|
||||||
|
f := &failureFollower{ff, -1, -1}
|
||||||
|
return &failureDelay{
|
||||||
|
Failure: f,
|
||||||
|
delayDuration: clus.GetFailureDelayDuration(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func describeMembers(mresp *clientv3.MemberListResponse) (ss []string) {
|
||||||
|
ss = make([]string, len(mresp.Members))
|
||||||
|
for i, m := range mresp.Members {
|
||||||
|
ss[i] = fmt.Sprintf("Name %s / ID %016x / ClientURLs %s / PeerURLs %s",
|
||||||
|
m.Name,
|
||||||
|
m.ID,
|
||||||
|
strings.Join(m.ClientURLs, ","),
|
||||||
|
strings.Join(m.PeerURLs, ","),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
sort.Strings(ss)
|
||||||
|
return ss
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user