mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
269 lines
7.6 KiB
Go
269 lines
7.6 KiB
Go
// Copyright 2015 CoreOS, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package main
|
|
|
|
import (
|
|
"log"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/net/context"
|
|
"github.com/coreos/etcd/Godeps/_workspace/src/google.golang.org/grpc"
|
|
|
|
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
|
)
|
|
|
|
type tester struct {
|
|
failures []failure
|
|
cluster *cluster
|
|
limit int
|
|
|
|
status Status
|
|
}
|
|
|
|
func (tt *tester) runLoop() {
|
|
tt.status.Since = time.Now()
|
|
tt.status.RoundLimit = tt.limit
|
|
tt.status.cluster = tt.cluster
|
|
for _, f := range tt.failures {
|
|
tt.status.Failures = append(tt.status.Failures, f.Desc())
|
|
}
|
|
for i := 0; i < tt.limit; i++ {
|
|
tt.status.setRound(i)
|
|
|
|
var currentRevision int64
|
|
for j, f := range tt.failures {
|
|
tt.status.setCase(j)
|
|
|
|
if err := tt.cluster.WaitHealth(); err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] wait full health error: %v", i, j, err)
|
|
if err := tt.cleanup(i, j); err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
log.Printf("etcd-tester: [round#%d case#%d] start failure %s", i, j, f.Desc())
|
|
|
|
log.Printf("etcd-tester: [round#%d case#%d] start injecting failure...", i, j)
|
|
if err := f.Inject(tt.cluster, i); err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] injection error: %v", i, j, err)
|
|
if err := tt.cleanup(i, j); err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
log.Printf("etcd-tester: [round#%d case#%d] injected failure", i, j)
|
|
|
|
log.Printf("etcd-tester: [round#%d case#%d] start recovering failure...", i, j)
|
|
if err := f.Recover(tt.cluster, i); err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] recovery error: %v", i, j, err)
|
|
if err := tt.cleanup(i, j); err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
log.Printf("etcd-tester: [round#%d case#%d] recovered failure", i, j)
|
|
|
|
if tt.cluster.v2Only {
|
|
log.Printf("etcd-tester: [round#%d case#%d] succeed!", i, j)
|
|
continue
|
|
}
|
|
|
|
log.Printf("etcd-tester: [round#%d case#%d] canceling the stressers...", i, j)
|
|
for _, s := range tt.cluster.Stressers {
|
|
s.Cancel()
|
|
}
|
|
log.Printf("etcd-tester: [round#%d case#%d] canceled stressers", i, j)
|
|
|
|
log.Printf("etcd-tester: [round#%d case#%d] checking current revisions...", i, j)
|
|
var (
|
|
revs map[string]int64
|
|
rerr error
|
|
ok bool
|
|
)
|
|
for k := 0; k < 5; k++ {
|
|
time.Sleep(time.Second)
|
|
|
|
revs, rerr = tt.cluster.getRevision()
|
|
if rerr != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d.%d] failed to get current revisions (%v)", i, j, k, rerr)
|
|
continue
|
|
}
|
|
if currentRevision, ok = getSameValue(revs); ok {
|
|
break
|
|
}
|
|
|
|
log.Printf("etcd-tester: [round#%d case#%d.%d] inconsistent current revisions %+v", i, j, k, revs)
|
|
}
|
|
if !ok || rerr != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] checking current revisions failed (%v)", i, j, revs)
|
|
if err := tt.cleanup(i, j); err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
log.Printf("etcd-tester: [round#%d case#%d] all members are consistent with current revisions", i, j)
|
|
|
|
log.Printf("etcd-tester: [round#%d case#%d] checking current storage hashes...", i, j)
|
|
hashes, err := tt.cluster.getKVHash()
|
|
if err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] getKVHash error (%v)", i, j, err)
|
|
if err := tt.cleanup(i, j); err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
if _, ok = getSameValue(hashes); !ok {
|
|
log.Printf("etcd-tester: [round#%d case#%d] checking current storage hashes failed (%v)", i, j, hashes)
|
|
if err := tt.cleanup(i, j); err != nil {
|
|
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
log.Printf("etcd-tester: [round#%d case#%d] all members are consistent with storage hashes", i, j)
|
|
|
|
log.Printf("etcd-tester: [round#%d case#%d] restarting the stressers...", i, j)
|
|
for _, s := range tt.cluster.Stressers {
|
|
go s.Stress()
|
|
}
|
|
|
|
log.Printf("etcd-tester: [round#%d case#%d] succeed!", i, j)
|
|
}
|
|
|
|
revToCompact := max(0, currentRevision-10000)
|
|
log.Printf("etcd-tester: [round#%d] compacting storage at %d (current revision %d)", i, revToCompact, currentRevision)
|
|
if err := tt.cluster.compactKV(revToCompact); err != nil {
|
|
log.Printf("etcd-tester: [round#%d] compactKV error (%v)", i, err)
|
|
if err := tt.cleanup(i, 0); err != nil {
|
|
log.Printf("etcd-tester: [round#%d] cleanup error: %v", i, err)
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
log.Printf("etcd-tester: [round#%d] compacted storage", i)
|
|
|
|
// TODO: make sure compaction is finished
|
|
time.Sleep(30 * time.Second)
|
|
}
|
|
}
|
|
|
|
func (tt *tester) cleanup(i, j int) error {
|
|
log.Printf("etcd-tester: [round#%d case#%d] cleaning up...", i, j)
|
|
if err := tt.cluster.Cleanup(); err != nil {
|
|
return err
|
|
}
|
|
return tt.cluster.Bootstrap()
|
|
}
|
|
|
|
type Status struct {
|
|
Since time.Time
|
|
Failures []string
|
|
RoundLimit int
|
|
|
|
Cluster ClusterStatus
|
|
cluster *cluster
|
|
|
|
mu sync.Mutex // guards Round and Case
|
|
Round int
|
|
Case int
|
|
}
|
|
|
|
// get gets a copy of status
|
|
func (s *Status) get() Status {
|
|
s.mu.Lock()
|
|
got := *s
|
|
cluster := s.cluster
|
|
s.mu.Unlock()
|
|
got.Cluster = cluster.Status()
|
|
return got
|
|
}
|
|
|
|
func (s *Status) setRound(r int) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
s.Round = r
|
|
}
|
|
|
|
func (s *Status) setCase(c int) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
s.Case = c
|
|
}
|
|
|
|
func (c *cluster) getRevision() (map[string]int64, error) {
|
|
revs := make(map[string]int64)
|
|
for _, u := range c.GRPCURLs {
|
|
conn, err := grpc.Dial(u, grpc.WithInsecure(), grpc.WithTimeout(5*time.Second))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
kvc := pb.NewKVClient(conn)
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
resp, err := kvc.Range(ctx, &pb.RangeRequest{Key: []byte("foo")})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
cancel()
|
|
revs[u] = resp.Header.Revision
|
|
}
|
|
return revs, nil
|
|
}
|
|
|
|
func (c *cluster) getKVHash() (map[string]int64, error) {
|
|
hashes := make(map[string]int64)
|
|
for _, u := range c.GRPCURLs {
|
|
conn, err := grpc.Dial(u, grpc.WithInsecure(), grpc.WithTimeout(5*time.Second))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
kvc := pb.NewKVClient(conn)
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
resp, err := kvc.Hash(ctx, &pb.HashRequest{})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
cancel()
|
|
hashes[u] = int64(resp.Hash)
|
|
}
|
|
return hashes, nil
|
|
}
|
|
|
|
func (c *cluster) compactKV(rev int64) error {
|
|
var (
|
|
conn *grpc.ClientConn
|
|
err error
|
|
)
|
|
for _, u := range c.GRPCURLs {
|
|
conn, err = grpc.Dial(u, grpc.WithInsecure(), grpc.WithTimeout(5*time.Second))
|
|
if err != nil {
|
|
continue
|
|
}
|
|
kvc := pb.NewKVClient(conn)
|
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
|
_, err = kvc.Compact(ctx, &pb.CompactionRequest{Revision: rev})
|
|
cancel()
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
}
|
|
return err
|
|
}
|