mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
196 lines
5.1 KiB
Go
196 lines
5.1 KiB
Go
// Copyright 2015 CoreOS, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"log"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/net/context"
|
|
"github.com/coreos/etcd/Godeps/_workspace/src/google.golang.org/grpc"
|
|
|
|
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
|
)
|
|
|
|
type tester struct {
|
|
failures []failure
|
|
cluster *cluster
|
|
limit int
|
|
|
|
status Status
|
|
}
|
|
|
|
func (tt *tester) runLoop() {
|
|
tt.status.Since = time.Now()
|
|
tt.status.RoundLimit = tt.limit
|
|
tt.status.cluster = tt.cluster
|
|
for _, f := range tt.failures {
|
|
tt.status.Failures = append(tt.status.Failures, f.Desc())
|
|
}
|
|
for i := 0; i < tt.limit; i++ {
|
|
tt.status.setRound(i)
|
|
|
|
for j, f := range tt.failures {
|
|
tt.status.setCase(j)
|
|
|
|
if err := tt.cluster.WaitHealth(); err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] wait full health error: %v", i, j, err)
|
|
if err := tt.cleanup(i, j); err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
log.Printf("etcd-tester: [round#%d case#%d] start failure %s", i, j, f.Desc())
|
|
log.Printf("etcd-tester: [round#%d case#%d] start injecting failure...", i, j)
|
|
if err := f.Inject(tt.cluster, i); err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] injection error: %v", i, j, err)
|
|
if err := tt.cleanup(i, j); err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
log.Printf("etcd-tester: [round#%d case#%d] start recovering failure...", i, j)
|
|
if err := f.Recover(tt.cluster, i); err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] recovery error: %v", i, j, err)
|
|
if err := tt.cleanup(i, j); err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
|
|
if tt.cluster.v2Only {
|
|
log.Printf("etcd-tester: [round#%d case#%d] succeed!", i, j)
|
|
continue
|
|
}
|
|
|
|
log.Printf("etcd-tester: [round#%d case#%d] canceling the stressers...", i, j)
|
|
for _, s := range tt.cluster.Stressers {
|
|
s.Cancel()
|
|
}
|
|
|
|
log.Printf("etcd-tester: [round#%d case#%d] waiting 5s for pending PUTs to be committed across cluster...", i, j)
|
|
time.Sleep(5 * time.Second)
|
|
|
|
log.Printf("etcd-tester: [round#%d case#%d] starting checking consistency...", i, j)
|
|
err := tt.cluster.checkConsistency()
|
|
if err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] checkConsistency error (%v)", i, j, err)
|
|
if err := tt.cleanup(i, j); err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
|
|
return
|
|
}
|
|
} else {
|
|
log.Printf("etcd-tester: [round#%d case#%d] all members are consistent!", i, j)
|
|
log.Printf("etcd-tester: [round#%d case#%d] succeed!", i, j)
|
|
}
|
|
|
|
log.Printf("etcd-tester: [round#%d case#%d] restarting the stressers...", i, j)
|
|
for _, s := range tt.cluster.Stressers {
|
|
go s.Stress()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (tt *tester) cleanup(i, j int) error {
|
|
log.Printf("etcd-tester: [round#%d case#%d] cleaning up...", i, j)
|
|
if err := tt.cluster.Cleanup(); err != nil {
|
|
return err
|
|
}
|
|
return tt.cluster.Bootstrap()
|
|
}
|
|
|
|
type Status struct {
|
|
Since time.Time
|
|
Failures []string
|
|
RoundLimit int
|
|
|
|
Cluster ClusterStatus
|
|
cluster *cluster
|
|
|
|
mu sync.Mutex // guards Round and Case
|
|
Round int
|
|
Case int
|
|
}
|
|
|
|
// get gets a copy of status
|
|
func (s *Status) get() Status {
|
|
s.mu.Lock()
|
|
got := *s
|
|
cluster := s.cluster
|
|
s.mu.Unlock()
|
|
got.Cluster = cluster.Status()
|
|
return got
|
|
}
|
|
|
|
func (s *Status) setRound(r int) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
s.Round = r
|
|
}
|
|
|
|
func (s *Status) setCase(c int) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
s.Case = c
|
|
}
|
|
|
|
// checkConsistency stops the cluster for a moment and get the hashes of KV storages.
|
|
func (c *cluster) checkConsistency() error {
|
|
hashes := make(map[string]uint32)
|
|
for _, u := range c.GRPCURLs {
|
|
conn, err := grpc.Dial(u, grpc.WithInsecure(), grpc.WithTimeout(5*time.Second))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
kvc := pb.NewKVClient(conn)
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
resp, err := kvc.Hash(ctx, &pb.HashRequest{})
|
|
hv := resp.Hash
|
|
if resp != nil && err != nil {
|
|
return err
|
|
}
|
|
cancel()
|
|
|
|
hashes[u] = hv
|
|
}
|
|
|
|
if !checkConsistency(hashes) {
|
|
return fmt.Errorf("check consistency fails: %v", hashes)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// checkConsistency returns true if all nodes have the same KV hash values.
|
|
func checkConsistency(hashes map[string]uint32) bool {
|
|
var cv uint32
|
|
isConsistent := true
|
|
for _, v := range hashes {
|
|
if cv == 0 {
|
|
cv = v
|
|
}
|
|
if cv != v {
|
|
isConsistent = false
|
|
}
|
|
}
|
|
return isConsistent
|
|
}
|