tools: remove

Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
This commit is contained in:
Gyuho Lee 2018-04-10 23:43:16 -07:00
parent dd9f05567d
commit 074e417770
73 changed files with 0 additions and 8113 deletions

View File

@ -1 +0,0 @@
benchmark

View File

@ -1,16 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package cmd implements individual benchmark commands for the benchmark utility.
package cmd

View File

@ -1,85 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"context"
"fmt"
"time"
v3 "github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/report"
"github.com/spf13/cobra"
"gopkg.in/cheggaaa/pb.v1"
)
var leaseKeepaliveCmd = &cobra.Command{
Use: "lease-keepalive",
Short: "Benchmark lease keepalive",
Run: leaseKeepaliveFunc,
}
var (
leaseKeepaliveTotal int
)
func init() {
RootCmd.AddCommand(leaseKeepaliveCmd)
leaseKeepaliveCmd.Flags().IntVar(&leaseKeepaliveTotal, "total", 10000, "Total number of lease keepalive requests")
}
func leaseKeepaliveFunc(cmd *cobra.Command, args []string) {
requests := make(chan struct{})
clients := mustCreateClients(totalClients, totalConns)
bar = pb.New(leaseKeepaliveTotal)
bar.Format("Bom !")
bar.Start()
r := newReport()
for i := range clients {
wg.Add(1)
go func(c v3.Lease) {
defer wg.Done()
resp, err := c.Grant(context.Background(), 100)
if err != nil {
panic(err)
}
for range requests {
st := time.Now()
_, err := c.KeepAliveOnce(context.TODO(), resp.ID)
r.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
bar.Increment()
}
}(clients[i])
}
wg.Add(1)
go func() {
defer wg.Done()
for i := 0; i < leaseKeepaliveTotal; i++ {
requests <- struct{}{}
}
close(requests)
}()
rc := r.Run()
wg.Wait()
close(r.Results())
bar.Finish()
fmt.Printf("%s", <-rc)
}

View File

@ -1,135 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"crypto/rand"
"fmt"
"os"
"runtime/pprof"
"time"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/pkg/report"
"github.com/spf13/cobra"
)
// mvccPutCmd represents a storage put performance benchmarking tool
var mvccPutCmd = &cobra.Command{
Use: "put",
Short: "Benchmark put performance of storage",
Run: mvccPutFunc,
}
var (
mvccTotalRequests int
storageKeySize int
valueSize int
txn bool
nrTxnOps int
)
func init() {
mvccCmd.AddCommand(mvccPutCmd)
mvccPutCmd.Flags().IntVar(&mvccTotalRequests, "total", 100, "a total number of keys to put")
mvccPutCmd.Flags().IntVar(&storageKeySize, "key-size", 64, "a size of key (Byte)")
mvccPutCmd.Flags().IntVar(&valueSize, "value-size", 64, "a size of value (Byte)")
mvccPutCmd.Flags().BoolVar(&txn, "txn", false, "put a key in transaction or not")
mvccPutCmd.Flags().IntVar(&nrTxnOps, "txn-ops", 1, "a number of keys to put per transaction")
// TODO: after the PR https://github.com/spf13/cobra/pull/220 is merged, the below pprof related flags should be moved to RootCmd
mvccPutCmd.Flags().StringVar(&cpuProfPath, "cpuprofile", "", "the path of file for storing cpu profile result")
mvccPutCmd.Flags().StringVar(&memProfPath, "memprofile", "", "the path of file for storing heap profile result")
}
func createBytesSlice(bytesN, sliceN int) [][]byte {
rs := make([][]byte, sliceN)
for i := range rs {
rs[i] = make([]byte, bytesN)
if _, err := rand.Read(rs[i]); err != nil {
panic(err)
}
}
return rs
}
func mvccPutFunc(cmd *cobra.Command, args []string) {
if cpuProfPath != "" {
f, err := os.Create(cpuProfPath)
if err != nil {
fmt.Fprintln(os.Stderr, "Failed to create a file for storing cpu profile result: ", err)
os.Exit(1)
}
err = pprof.StartCPUProfile(f)
if err != nil {
fmt.Fprintln(os.Stderr, "Failed to start cpu profile: ", err)
os.Exit(1)
}
defer pprof.StopCPUProfile()
}
if memProfPath != "" {
f, err := os.Create(memProfPath)
if err != nil {
fmt.Fprintln(os.Stderr, "Failed to create a file for storing heap profile result: ", err)
os.Exit(1)
}
defer func() {
err := pprof.WriteHeapProfile(f)
if err != nil {
fmt.Fprintln(os.Stderr, "Failed to write heap profile result: ", err)
// can do nothing for handling the error
}
}()
}
keys := createBytesSlice(storageKeySize, mvccTotalRequests*nrTxnOps)
vals := createBytesSlice(valueSize, mvccTotalRequests*nrTxnOps)
weight := float64(nrTxnOps)
r := newWeightedReport()
rrc := r.Results()
rc := r.Run()
if txn {
for i := 0; i < mvccTotalRequests; i++ {
st := time.Now()
tw := s.Write()
for j := i; j < i+nrTxnOps; j++ {
tw.Put(keys[j], vals[j], lease.NoLease)
}
tw.End()
rrc <- report.Result{Start: st, End: time.Now(), Weight: weight}
}
} else {
for i := 0; i < mvccTotalRequests; i++ {
st := time.Now()
s.Put(keys[i], vals[i], lease.NoLease)
rrc <- report.Result{Start: st, End: time.Now()}
}
}
close(r.Results())
fmt.Printf("%s", <-rc)
}

View File

@ -1,61 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"os"
"time"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/backend"
"github.com/spf13/cobra"
)
var (
batchInterval int
batchLimit int
s mvcc.KV
)
func initMVCC() {
bcfg := backend.DefaultBackendConfig()
bcfg.Path, bcfg.BatchInterval, bcfg.BatchLimit = "mvcc-bench", time.Duration(batchInterval)*time.Millisecond, batchLimit
be := backend.New(bcfg)
s = mvcc.NewStore(be, &lease.FakeLessor{}, nil)
os.Remove("mvcc-bench") // boltDB has an opened fd, so removing the file is ok
}
// mvccCmd represents the MVCC storage benchmarking tools
var mvccCmd = &cobra.Command{
Use: "mvcc",
Short: "Benchmark mvcc",
Long: `storage subcommand is a set of various benchmark tools for MVCC storage subsystem of etcd.
Actual benchmarks are implemented as its subcommands.`,
PersistentPreRun: mvccPreRun,
}
func init() {
RootCmd.AddCommand(mvccCmd)
mvccCmd.PersistentFlags().IntVar(&batchInterval, "batch-interval", 100, "Interval of batching (milliseconds)")
mvccCmd.PersistentFlags().IntVar(&batchLimit, "batch-limit", 10000, "A limit of batched transaction")
}
func mvccPreRun(cmd *cobra.Command, args []string) {
initMVCC()
}

View File

@ -1,152 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"context"
"encoding/binary"
"fmt"
"math"
"math/rand"
"os"
"time"
v3 "github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/report"
"github.com/spf13/cobra"
"golang.org/x/time/rate"
"gopkg.in/cheggaaa/pb.v1"
)
// putCmd represents the put command
var putCmd = &cobra.Command{
Use: "put",
Short: "Benchmark put",
Run: putFunc,
}
var (
keySize int
valSize int
putTotal int
putRate int
keySpaceSize int
seqKeys bool
compactInterval time.Duration
compactIndexDelta int64
)
func init() {
RootCmd.AddCommand(putCmd)
putCmd.Flags().IntVar(&keySize, "key-size", 8, "Key size of put request")
putCmd.Flags().IntVar(&valSize, "val-size", 8, "Value size of put request")
putCmd.Flags().IntVar(&putRate, "rate", 0, "Maximum puts per second (0 is no limit)")
putCmd.Flags().IntVar(&putTotal, "total", 10000, "Total number of put requests")
putCmd.Flags().IntVar(&keySpaceSize, "key-space-size", 1, "Maximum possible keys")
putCmd.Flags().BoolVar(&seqKeys, "sequential-keys", false, "Use sequential keys")
putCmd.Flags().DurationVar(&compactInterval, "compact-interval", 0, `Interval to compact database (do not duplicate this with etcd's 'auto-compaction-retention' flag) (e.g. --compact-interval=5m compacts every 5-minute)`)
putCmd.Flags().Int64Var(&compactIndexDelta, "compact-index-delta", 1000, "Delta between current revision and compact revision (e.g. current revision 10000, compact at 9000)")
}
func putFunc(cmd *cobra.Command, args []string) {
if keySpaceSize <= 0 {
fmt.Fprintf(os.Stderr, "expected positive --key-space-size, got (%v)", keySpaceSize)
os.Exit(1)
}
requests := make(chan v3.Op, totalClients)
if putRate == 0 {
putRate = math.MaxInt32
}
limit := rate.NewLimiter(rate.Limit(putRate), 1)
clients := mustCreateClients(totalClients, totalConns)
k, v := make([]byte, keySize), string(mustRandBytes(valSize))
bar = pb.New(putTotal)
bar.Format("Bom !")
bar.Start()
r := newReport()
for i := range clients {
wg.Add(1)
go func(c *v3.Client) {
defer wg.Done()
for op := range requests {
limit.Wait(context.Background())
st := time.Now()
_, err := c.Do(context.Background(), op)
r.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
bar.Increment()
}
}(clients[i])
}
go func() {
for i := 0; i < putTotal; i++ {
if seqKeys {
binary.PutVarint(k, int64(i%keySpaceSize))
} else {
binary.PutVarint(k, int64(rand.Intn(keySpaceSize)))
}
requests <- v3.OpPut(string(k), v)
}
close(requests)
}()
if compactInterval > 0 {
go func() {
for {
time.Sleep(compactInterval)
compactKV(clients)
}
}()
}
rc := r.Run()
wg.Wait()
close(r.Results())
bar.Finish()
fmt.Println(<-rc)
}
func compactKV(clients []*v3.Client) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
resp, err := clients[0].KV.Get(ctx, "foo")
cancel()
if err != nil {
panic(err)
}
revToCompact := max(0, resp.Header.Revision-compactIndexDelta)
ctx, cancel = context.WithTimeout(context.Background(), 5*time.Second)
_, err = clients[0].KV.Compact(ctx, revToCompact)
cancel()
if err != nil {
panic(err)
}
}
func max(n1, n2 int64) int64 {
if n1 > n2 {
return n1
}
return n2
}

View File

@ -1,119 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"context"
"fmt"
"math"
"os"
"time"
v3 "github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/report"
"github.com/spf13/cobra"
"golang.org/x/time/rate"
"gopkg.in/cheggaaa/pb.v1"
)
// rangeCmd represents the range command
var rangeCmd = &cobra.Command{
Use: "range key [end-range]",
Short: "Benchmark range",
Run: rangeFunc,
}
var (
rangeRate int
rangeTotal int
rangeConsistency string
)
func init() {
RootCmd.AddCommand(rangeCmd)
rangeCmd.Flags().IntVar(&rangeRate, "rate", 0, "Maximum range requests per second (0 is no limit)")
rangeCmd.Flags().IntVar(&rangeTotal, "total", 10000, "Total number of range requests")
rangeCmd.Flags().StringVar(&rangeConsistency, "consistency", "l", "Linearizable(l) or Serializable(s)")
}
func rangeFunc(cmd *cobra.Command, args []string) {
if len(args) == 0 || len(args) > 2 {
fmt.Fprintln(os.Stderr, cmd.Usage())
os.Exit(1)
}
k := args[0]
end := ""
if len(args) == 2 {
end = args[1]
}
if rangeConsistency == "l" {
fmt.Println("bench with linearizable range")
} else if rangeConsistency == "s" {
fmt.Println("bench with serializable range")
} else {
fmt.Fprintln(os.Stderr, cmd.Usage())
os.Exit(1)
}
if rangeRate == 0 {
rangeRate = math.MaxInt32
}
limit := rate.NewLimiter(rate.Limit(rangeRate), 1)
requests := make(chan v3.Op, totalClients)
clients := mustCreateClients(totalClients, totalConns)
bar = pb.New(rangeTotal)
bar.Format("Bom !")
bar.Start()
r := newReport()
for i := range clients {
wg.Add(1)
go func(c *v3.Client) {
defer wg.Done()
for op := range requests {
limit.Wait(context.Background())
st := time.Now()
_, err := c.Do(context.Background(), op)
r.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
bar.Increment()
}
}(clients[i])
}
go func() {
for i := 0; i < rangeTotal; i++ {
opts := []v3.OpOption{v3.WithRange(end)}
if rangeConsistency == "s" {
opts = append(opts, v3.WithSerializable())
}
op := v3.OpGet(k, opts...)
requests <- op
}
close(requests)
}()
rc := r.Run()
wg.Wait()
close(r.Results())
bar.Finish()
fmt.Printf("%s", <-rc)
}

View File

@ -1,74 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"sync"
"time"
"github.com/coreos/etcd/pkg/transport"
"github.com/spf13/cobra"
"gopkg.in/cheggaaa/pb.v1"
)
// This represents the base command when called without any subcommands
var RootCmd = &cobra.Command{
Use: "benchmark",
Short: "A low-level benchmark tool for etcd3",
Long: `benchmark is a low-level benchmark tool for etcd3.
It uses gRPC client directly and does not depend on
etcd client library.
`,
}
var (
endpoints []string
totalConns uint
totalClients uint
precise bool
sample bool
bar *pb.ProgressBar
wg sync.WaitGroup
tls transport.TLSInfo
cpuProfPath string
memProfPath string
user string
dialTimeout time.Duration
targetLeader bool
)
func init() {
RootCmd.PersistentFlags().StringSliceVar(&endpoints, "endpoints", []string{"127.0.0.1:2379"}, "gRPC endpoints")
RootCmd.PersistentFlags().UintVar(&totalConns, "conns", 1, "Total number of gRPC connections")
RootCmd.PersistentFlags().UintVar(&totalClients, "clients", 1, "Total number of gRPC clients")
RootCmd.PersistentFlags().BoolVar(&precise, "precise", false, "use full floating point precision")
RootCmd.PersistentFlags().BoolVar(&sample, "sample", false, "'true' to sample requests for every second")
RootCmd.PersistentFlags().StringVar(&tls.CertFile, "cert", "", "identify HTTPS client using this SSL certificate file")
RootCmd.PersistentFlags().StringVar(&tls.KeyFile, "key", "", "identify HTTPS client using this SSL key file")
RootCmd.PersistentFlags().StringVar(&tls.CAFile, "cacert", "", "verify certificates of HTTPS-enabled servers using this CA bundle")
RootCmd.PersistentFlags().StringVar(&user, "user", "", "provide username[:password] and prompt if password is not supplied.")
RootCmd.PersistentFlags().DurationVar(&dialTimeout, "dial-timeout", 0, "dial timeout for client connections")
RootCmd.PersistentFlags().BoolVar(&targetLeader, "target-leader", false, "connect only to the leader node")
}

View File

@ -1,208 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"context"
"encoding/binary"
"fmt"
"math"
"math/rand"
"os"
"time"
v3 "github.com/coreos/etcd/clientv3"
v3sync "github.com/coreos/etcd/clientv3/concurrency"
"github.com/coreos/etcd/etcdserver/api/v3lock/v3lockpb"
"github.com/coreos/etcd/pkg/report"
"github.com/spf13/cobra"
"golang.org/x/time/rate"
"gopkg.in/cheggaaa/pb.v1"
)
// stmCmd represents the STM benchmark command
var stmCmd = &cobra.Command{
Use: "stm",
Short: "Benchmark STM",
Run: stmFunc,
}
type stmApply func(v3sync.STM) error
var (
stmIsolation string
stmIso v3sync.Isolation
stmTotal int
stmKeysPerTxn int
stmKeyCount int
stmValSize int
stmWritePercent int
stmLocker string
stmRate int
)
func init() {
RootCmd.AddCommand(stmCmd)
stmCmd.Flags().StringVar(&stmIsolation, "isolation", "r", "Read Committed (c), Repeatable Reads (r), Serializable (s), or Snapshot (ss)")
stmCmd.Flags().IntVar(&stmKeyCount, "keys", 1, "Total unique keys accessible by the benchmark")
stmCmd.Flags().IntVar(&stmTotal, "total", 10000, "Total number of completed STM transactions")
stmCmd.Flags().IntVar(&stmKeysPerTxn, "keys-per-txn", 1, "Number of keys to access per transaction")
stmCmd.Flags().IntVar(&stmWritePercent, "txn-wr-percent", 50, "Percentage of keys to overwrite per transaction")
stmCmd.Flags().StringVar(&stmLocker, "stm-locker", "stm", "Wrap STM transaction with a custom locking mechanism (stm, lock-client, lock-rpc)")
stmCmd.Flags().IntVar(&stmValSize, "val-size", 8, "Value size of each STM put request")
stmCmd.Flags().IntVar(&stmRate, "rate", 0, "Maximum STM transactions per second (0 is no limit)")
}
func stmFunc(cmd *cobra.Command, args []string) {
if stmKeyCount <= 0 {
fmt.Fprintf(os.Stderr, "expected positive --keys, got (%v)", stmKeyCount)
os.Exit(1)
}
if stmWritePercent < 0 || stmWritePercent > 100 {
fmt.Fprintf(os.Stderr, "expected [0, 100] --txn-wr-percent, got (%v)", stmWritePercent)
os.Exit(1)
}
if stmKeysPerTxn < 0 || stmKeysPerTxn > stmKeyCount {
fmt.Fprintf(os.Stderr, "expected --keys-per-txn between 0 and %v, got (%v)", stmKeyCount, stmKeysPerTxn)
os.Exit(1)
}
switch stmIsolation {
case "c":
stmIso = v3sync.ReadCommitted
case "r":
stmIso = v3sync.RepeatableReads
case "s":
stmIso = v3sync.Serializable
case "ss":
stmIso = v3sync.SerializableSnapshot
default:
fmt.Fprintln(os.Stderr, cmd.Usage())
os.Exit(1)
}
if stmRate == 0 {
stmRate = math.MaxInt32
}
limit := rate.NewLimiter(rate.Limit(stmRate), 1)
requests := make(chan stmApply, totalClients)
clients := mustCreateClients(totalClients, totalConns)
bar = pb.New(stmTotal)
bar.Format("Bom !")
bar.Start()
r := newReport()
for i := range clients {
wg.Add(1)
go doSTM(clients[i], requests, r.Results())
}
go func() {
for i := 0; i < stmTotal; i++ {
kset := make(map[string]struct{})
for len(kset) != stmKeysPerTxn {
k := make([]byte, 16)
binary.PutVarint(k, int64(rand.Intn(stmKeyCount)))
s := string(k)
kset[s] = struct{}{}
}
applyf := func(s v3sync.STM) error {
limit.Wait(context.Background())
wrs := int(float32(len(kset)*stmWritePercent) / 100.0)
for k := range kset {
s.Get(k)
if wrs > 0 {
s.Put(k, string(mustRandBytes(stmValSize)))
wrs--
}
}
return nil
}
requests <- applyf
}
close(requests)
}()
rc := r.Run()
wg.Wait()
close(r.Results())
bar.Finish()
fmt.Printf("%s", <-rc)
}
func doSTM(client *v3.Client, requests <-chan stmApply, results chan<- report.Result) {
defer wg.Done()
lock, unlock := func() error { return nil }, func() error { return nil }
switch stmLocker {
case "lock-client":
s, err := v3sync.NewSession(client)
if err != nil {
panic(err)
}
defer s.Close()
m := v3sync.NewMutex(s, "stmlock")
lock = func() error { return m.Lock(context.TODO()) }
unlock = func() error { return m.Unlock(context.TODO()) }
case "lock-rpc":
var lockKey []byte
s, err := v3sync.NewSession(client)
if err != nil {
panic(err)
}
defer s.Close()
lc := v3lockpb.NewLockClient(client.ActiveConnection())
lock = func() error {
req := &v3lockpb.LockRequest{Name: []byte("stmlock"), Lease: int64(s.Lease())}
resp, err := lc.Lock(context.TODO(), req)
if resp != nil {
lockKey = resp.Key
}
return err
}
unlock = func() error {
req := &v3lockpb.UnlockRequest{Key: lockKey}
_, err := lc.Unlock(context.TODO(), req)
return err
}
case "stm":
default:
fmt.Fprintf(os.Stderr, "unexpected stm locker %q\n", stmLocker)
os.Exit(1)
}
for applyf := range requests {
st := time.Now()
if lerr := lock(); lerr != nil {
panic(lerr)
}
_, err := v3sync.NewSTM(client, applyf, v3sync.WithIsolation(stmIso))
if lerr := unlock(); lerr != nil {
panic(lerr)
}
results <- report.Result{Err: err, Start: st, End: time.Now()}
bar.Increment()
}
}

View File

@ -1,108 +0,0 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"context"
"encoding/binary"
"fmt"
"math"
"os"
"time"
v3 "github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/report"
"github.com/spf13/cobra"
"golang.org/x/time/rate"
"gopkg.in/cheggaaa/pb.v1"
)
// txnPutCmd represents the txnPut command
var txnPutCmd = &cobra.Command{
Use: "txn-put",
Short: "Benchmark txn-put",
Run: txnPutFunc,
}
var (
txnPutTotal int
txnPutRate int
txnPutOpsPerTxn int
)
func init() {
RootCmd.AddCommand(txnPutCmd)
txnPutCmd.Flags().IntVar(&keySize, "key-size", 8, "Key size of txn put")
txnPutCmd.Flags().IntVar(&valSize, "val-size", 8, "Value size of txn put")
txnPutCmd.Flags().IntVar(&txnPutOpsPerTxn, "txn-ops", 1, "Number of puts per txn")
txnPutCmd.Flags().IntVar(&txnPutRate, "rate", 0, "Maximum txns per second (0 is no limit)")
txnPutCmd.Flags().IntVar(&txnPutTotal, "total", 10000, "Total number of txn requests")
txnPutCmd.Flags().IntVar(&keySpaceSize, "key-space-size", 1, "Maximum possible keys")
}
func txnPutFunc(cmd *cobra.Command, args []string) {
if keySpaceSize <= 0 {
fmt.Fprintf(os.Stderr, "expected positive --key-space-size, got (%v)", keySpaceSize)
os.Exit(1)
}
requests := make(chan []v3.Op, totalClients)
if txnPutRate == 0 {
txnPutRate = math.MaxInt32
}
limit := rate.NewLimiter(rate.Limit(txnPutRate), 1)
clients := mustCreateClients(totalClients, totalConns)
k, v := make([]byte, keySize), string(mustRandBytes(valSize))
bar = pb.New(txnPutTotal)
bar.Format("Bom !")
bar.Start()
r := newReport()
for i := range clients {
wg.Add(1)
go func(c *v3.Client) {
defer wg.Done()
for ops := range requests {
limit.Wait(context.Background())
st := time.Now()
_, err := c.Txn(context.TODO()).Then(ops...).Commit()
r.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
bar.Increment()
}
}(clients[i])
}
go func() {
for i := 0; i < txnPutTotal; i++ {
ops := make([]v3.Op, txnPutOpsPerTxn)
for j := 0; j < txnPutOpsPerTxn; j++ {
binary.PutVarint(k, int64(((i*txnPutOpsPerTxn)+j)%keySpaceSize))
ops[j] = v3.OpPut(string(k), v)
}
requests <- ops
}
close(requests)
}()
rc := r.Run()
wg.Wait()
close(r.Results())
bar.Finish()
fmt.Println(<-rc)
}

View File

@ -1,179 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"context"
"crypto/rand"
"fmt"
"os"
"strings"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/report"
"google.golang.org/grpc/grpclog"
"github.com/bgentry/speakeasy"
)
var (
// dialTotal counts the number of mustCreateConn calls so that endpoint
// connections can be handed out in round-robin order
dialTotal int
// leaderEps is a cache for holding endpoints of a leader node
leaderEps []string
// cache the username and password for multiple connections
globalUserName string
globalPassword string
)
func mustFindLeaderEndpoints(c *clientv3.Client) {
resp, lerr := c.MemberList(context.TODO())
if lerr != nil {
fmt.Fprintf(os.Stderr, "failed to get a member list: %s\n", lerr)
os.Exit(1)
}
leaderId := uint64(0)
for _, ep := range c.Endpoints() {
if sresp, serr := c.Status(context.TODO(), ep); serr == nil {
leaderId = sresp.Leader
break
}
}
for _, m := range resp.Members {
if m.ID == leaderId {
leaderEps = m.ClientURLs
return
}
}
fmt.Fprintf(os.Stderr, "failed to find a leader endpoint\n")
os.Exit(1)
}
func getUsernamePassword(usernameFlag string) (string, string, error) {
if globalUserName != "" && globalPassword != "" {
return globalUserName, globalPassword, nil
}
colon := strings.Index(usernameFlag, ":")
if colon == -1 {
// Prompt for the password.
password, err := speakeasy.Ask("Password: ")
if err != nil {
return "", "", err
}
globalUserName = usernameFlag
globalPassword = password
} else {
globalUserName = usernameFlag[:colon]
globalPassword = usernameFlag[colon+1:]
}
return globalUserName, globalPassword, nil
}
func mustCreateConn() *clientv3.Client {
connEndpoints := leaderEps
if len(connEndpoints) == 0 {
connEndpoints = []string{endpoints[dialTotal%len(endpoints)]}
dialTotal++
}
cfg := clientv3.Config{
Endpoints: connEndpoints,
DialTimeout: dialTimeout,
}
if !tls.Empty() {
cfgtls, err := tls.ClientConfig()
if err != nil {
fmt.Fprintf(os.Stderr, "bad tls config: %v\n", err)
os.Exit(1)
}
cfg.TLS = cfgtls
}
if len(user) != 0 {
username, password, err := getUsernamePassword(user)
if err != nil {
fmt.Fprintf(os.Stderr, "bad user information: %s %v\n", user, err)
os.Exit(1)
}
cfg.Username = username
cfg.Password = password
}
client, err := clientv3.New(cfg)
if targetLeader && len(leaderEps) == 0 {
mustFindLeaderEndpoints(client)
client.Close()
return mustCreateConn()
}
clientv3.SetLogger(grpclog.NewLoggerV2(os.Stderr, os.Stderr, os.Stderr))
if err != nil {
fmt.Fprintf(os.Stderr, "dial error: %v\n", err)
os.Exit(1)
}
return client
}
func mustCreateClients(totalClients, totalConns uint) []*clientv3.Client {
conns := make([]*clientv3.Client, totalConns)
for i := range conns {
conns[i] = mustCreateConn()
}
clients := make([]*clientv3.Client, totalClients)
for i := range clients {
clients[i] = conns[i%int(totalConns)]
}
return clients
}
func mustRandBytes(n int) []byte {
rb := make([]byte, n)
_, err := rand.Read(rb)
if err != nil {
fmt.Fprintf(os.Stderr, "failed to generate value: %v\n", err)
os.Exit(1)
}
return rb
}
func newReport() report.Report {
p := "%4.4f"
if precise {
p = "%g"
}
if sample {
return report.NewReportSample(p)
}
return report.NewReport(p)
}
func newWeightedReport() report.Report {
p := "%4.4f"
if precise {
p = "%g"
}
if sample {
return report.NewReportSample(p)
}
return report.NewWeightedReport(report.NewReport(p), p)
}

View File

@ -1,247 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"context"
"encoding/binary"
"fmt"
"math/rand"
"os"
"sync/atomic"
"time"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/report"
"github.com/spf13/cobra"
"golang.org/x/time/rate"
"gopkg.in/cheggaaa/pb.v1"
)
// watchCmd represents the watch command
var watchCmd = &cobra.Command{
Use: "watch",
Short: "Benchmark watch",
Long: `Benchmark watch tests the performance of processing watch requests and
sending events to watchers. It tests the sending performance by
changing the value of the watched keys with concurrent put
requests.
During the test, each watcher watches (--total/--watchers) keys
(a watcher might watch on the same key multiple times if
--watched-key-total is small).
Each key is watched by (--total/--watched-key-total) watchers.
`,
Run: watchFunc,
}
var (
watchStreams int
watchWatchesPerStream int
watchedKeyTotal int
watchPutRate int
watchPutTotal int
watchKeySize int
watchKeySpaceSize int
watchSeqKeys bool
)
type watchedKeys struct {
watched []string
numWatchers map[string]int
watches []clientv3.WatchChan
// ctx to control all watches
ctx context.Context
cancel context.CancelFunc
}
func init() {
RootCmd.AddCommand(watchCmd)
watchCmd.Flags().IntVar(&watchStreams, "streams", 10, "Total watch streams")
watchCmd.Flags().IntVar(&watchWatchesPerStream, "watch-per-stream", 100, "Total watchers per stream")
watchCmd.Flags().IntVar(&watchedKeyTotal, "watched-key-total", 1, "Total number of keys to be watched")
watchCmd.Flags().IntVar(&watchPutRate, "put-rate", 0, "Number of keys to put per second")
watchCmd.Flags().IntVar(&watchPutTotal, "put-total", 1000, "Number of put requests")
watchCmd.Flags().IntVar(&watchKeySize, "key-size", 32, "Key size of watch request")
watchCmd.Flags().IntVar(&watchKeySpaceSize, "key-space-size", 1, "Maximum possible keys")
watchCmd.Flags().BoolVar(&watchSeqKeys, "sequential-keys", false, "Use sequential keys")
}
func watchFunc(cmd *cobra.Command, args []string) {
if watchKeySpaceSize <= 0 {
fmt.Fprintf(os.Stderr, "expected positive --key-space-size, got (%v)", watchKeySpaceSize)
os.Exit(1)
}
grpcConns := int(totalClients)
if totalClients > totalConns {
grpcConns = int(totalConns)
}
wantedConns := 1 + (watchStreams / 100)
if grpcConns < wantedConns {
fmt.Fprintf(os.Stderr, "warning: grpc limits 100 streams per client connection, have %d but need %d\n", grpcConns, wantedConns)
}
clients := mustCreateClients(totalClients, totalConns)
wk := newWatchedKeys()
benchMakeWatches(clients, wk)
benchPutWatches(clients, wk)
}
func benchMakeWatches(clients []*clientv3.Client, wk *watchedKeys) {
streams := make([]clientv3.Watcher, watchStreams)
for i := range streams {
streams[i] = clientv3.NewWatcher(clients[i%len(clients)])
}
keyc := make(chan string, watchStreams)
bar = pb.New(watchStreams * watchWatchesPerStream)
bar.Format("Bom !")
bar.Start()
r := newReport()
rch := r.Results()
wg.Add(len(streams) + 1)
wc := make(chan []clientv3.WatchChan, len(streams))
for _, s := range streams {
go func(s clientv3.Watcher) {
defer wg.Done()
var ws []clientv3.WatchChan
for i := 0; i < watchWatchesPerStream; i++ {
k := <-keyc
st := time.Now()
wch := s.Watch(wk.ctx, k)
rch <- report.Result{Start: st, End: time.Now()}
ws = append(ws, wch)
bar.Increment()
}
wc <- ws
}(s)
}
go func() {
defer func() {
close(keyc)
wg.Done()
}()
for i := 0; i < watchStreams*watchWatchesPerStream; i++ {
key := wk.watched[i%len(wk.watched)]
keyc <- key
wk.numWatchers[key]++
}
}()
rc := r.Run()
wg.Wait()
bar.Finish()
close(r.Results())
fmt.Printf("Watch creation summary:\n%s", <-rc)
for i := 0; i < len(streams); i++ {
wk.watches = append(wk.watches, (<-wc)...)
}
}
func newWatchedKeys() *watchedKeys {
watched := make([]string, watchedKeyTotal)
for i := range watched {
k := make([]byte, watchKeySize)
if watchSeqKeys {
binary.PutVarint(k, int64(i%watchKeySpaceSize))
} else {
binary.PutVarint(k, int64(rand.Intn(watchKeySpaceSize)))
}
watched[i] = string(k)
}
ctx, cancel := context.WithCancel(context.TODO())
return &watchedKeys{
watched: watched,
numWatchers: make(map[string]int),
ctx: ctx,
cancel: cancel,
}
}
func benchPutWatches(clients []*clientv3.Client, wk *watchedKeys) {
eventsTotal := 0
for i := 0; i < watchPutTotal; i++ {
eventsTotal += wk.numWatchers[wk.watched[i%len(wk.watched)]]
}
bar = pb.New(eventsTotal)
bar.Format("Bom !")
bar.Start()
r := newReport()
wg.Add(len(wk.watches))
nrRxed := int32(eventsTotal)
for _, w := range wk.watches {
go func(wc clientv3.WatchChan) {
defer wg.Done()
recvWatchChan(wc, r.Results(), &nrRxed)
wk.cancel()
}(w)
}
putreqc := make(chan clientv3.Op, len(clients))
go func() {
defer close(putreqc)
for i := 0; i < watchPutTotal; i++ {
putreqc <- clientv3.OpPut(wk.watched[i%(len(wk.watched))], "data")
}
}()
limit := rate.NewLimiter(rate.Limit(watchPutRate), 1)
for _, cc := range clients {
go func(c *clientv3.Client) {
for op := range putreqc {
if err := limit.Wait(context.TODO()); err != nil {
panic(err)
}
if _, err := c.Do(context.TODO(), op); err != nil {
panic(err)
}
}
}(cc)
}
rc := r.Run()
wg.Wait()
bar.Finish()
close(r.Results())
fmt.Printf("Watch events received summary:\n%s", <-rc)
}
func recvWatchChan(wch clientv3.WatchChan, results chan<- report.Result, nrRxed *int32) {
for r := range wch {
st := time.Now()
for range r.Events {
results <- report.Result{Start: st, End: time.Now()}
bar.Increment()
if atomic.AddInt32(nrRxed, -1) <= 0 {
return
}
}
}
}

View File

@ -1,118 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"context"
"fmt"
"sync"
"time"
v3 "github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/report"
"github.com/spf13/cobra"
"gopkg.in/cheggaaa/pb.v1"
)
// watchGetCmd represents the watch command
var watchGetCmd = &cobra.Command{
Use: "watch-get",
Short: "Benchmark watch with get",
Long: `Benchmark for serialized key gets with many unsynced watchers`,
Run: watchGetFunc,
}
var (
watchGetTotalWatchers int
watchGetTotalStreams int
watchEvents int
firstWatch sync.Once
)
func init() {
RootCmd.AddCommand(watchGetCmd)
watchGetCmd.Flags().IntVar(&watchGetTotalWatchers, "watchers", 10000, "Total number of watchers")
watchGetCmd.Flags().IntVar(&watchGetTotalStreams, "streams", 1, "Total number of watcher streams")
watchGetCmd.Flags().IntVar(&watchEvents, "events", 8, "Number of events per watcher")
}
func watchGetFunc(cmd *cobra.Command, args []string) {
clients := mustCreateClients(totalClients, totalConns)
getClient := mustCreateClients(1, 1)
// setup keys for watchers
watchRev := int64(0)
for i := 0; i < watchEvents; i++ {
v := fmt.Sprintf("%d", i)
resp, err := clients[0].Put(context.TODO(), "watchkey", v)
if err != nil {
panic(err)
}
if i == 0 {
watchRev = resp.Header.Revision
}
}
streams := make([]v3.Watcher, watchGetTotalStreams)
for i := range streams {
streams[i] = v3.NewWatcher(clients[i%len(clients)])
}
bar = pb.New(watchGetTotalWatchers * watchEvents)
bar.Format("Bom !")
bar.Start()
// report from trying to do serialized gets with concurrent watchers
r := newReport()
ctx, cancel := context.WithCancel(context.TODO())
f := func() {
defer close(r.Results())
for {
st := time.Now()
_, err := getClient[0].Get(ctx, "abc", v3.WithSerializable())
if ctx.Err() != nil {
break
}
r.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
}
}
wg.Add(watchGetTotalWatchers)
for i := 0; i < watchGetTotalWatchers; i++ {
go doUnsyncWatch(streams[i%len(streams)], watchRev, f)
}
rc := r.Run()
wg.Wait()
cancel()
bar.Finish()
fmt.Printf("Get during watch summary:\n%s", <-rc)
}
func doUnsyncWatch(stream v3.Watcher, rev int64, f func()) {
defer wg.Done()
wch := stream.Watch(context.TODO(), "watchkey", v3.WithRev(rev))
if wch == nil {
panic("could not open watch channel")
}
firstWatch.Do(func() { go f() })
i := 0
for i < watchEvents {
wev := <-wch
i += len(wev.Events)
bar.Add(len(wev.Events))
}
}

View File

@ -1,111 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"context"
"fmt"
"os"
"sync"
"time"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/report"
"github.com/spf13/cobra"
"golang.org/x/time/rate"
"gopkg.in/cheggaaa/pb.v1"
)
// watchLatencyCmd represents the watch latency command
var watchLatencyCmd = &cobra.Command{
Use: "watch-latency",
Short: "Benchmark watch latency",
Long: `Benchmarks the latency for watches by measuring
the latency between writing to a key and receiving the
associated watch response.`,
Run: watchLatencyFunc,
}
var (
watchLTotal int
watchLPutRate int
watchLKeySize int
watchLValueSize int
)
func init() {
RootCmd.AddCommand(watchLatencyCmd)
watchLatencyCmd.Flags().IntVar(&watchLTotal, "total", 10000, "Total number of put requests")
watchLatencyCmd.Flags().IntVar(&watchLPutRate, "put-rate", 100, "Number of keys to put per second")
watchLatencyCmd.Flags().IntVar(&watchLKeySize, "key-size", 32, "Key size of watch response")
watchLatencyCmd.Flags().IntVar(&watchLValueSize, "val-size", 32, "Value size of watch response")
}
func watchLatencyFunc(cmd *cobra.Command, args []string) {
key := string(mustRandBytes(watchLKeySize))
value := string(mustRandBytes(watchLValueSize))
clients := mustCreateClients(totalClients, totalConns)
putClient := mustCreateConn()
wchs := make([]clientv3.WatchChan, len(clients))
for i := range wchs {
wchs[i] = clients[i].Watch(context.TODO(), key)
}
bar = pb.New(watchLTotal)
bar.Format("Bom !")
bar.Start()
limiter := rate.NewLimiter(rate.Limit(watchLPutRate), watchLPutRate)
r := newReport()
rc := r.Run()
for i := 0; i < watchLTotal; i++ {
// limit key put as per reqRate
if err := limiter.Wait(context.TODO()); err != nil {
break
}
var st time.Time
var wg sync.WaitGroup
wg.Add(len(clients))
barrierc := make(chan struct{})
for _, wch := range wchs {
ch := wch
go func() {
<-barrierc
<-ch
r.Results() <- report.Result{Start: st, End: time.Now()}
wg.Done()
}()
}
if _, err := putClient.Put(context.TODO(), key, value); err != nil {
fmt.Fprintf(os.Stderr, "Failed to Put for watch latency benchmark: %v\n", err)
os.Exit(1)
}
st = time.Now()
close(barrierc)
wg.Wait()
bar.Increment()
}
close(r.Results())
bar.Finish()
fmt.Printf("%s", <-rc)
}

View File

@ -1,16 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// benchmark is a program for benchmarking etcd v3 API performance.
package main

View File

@ -1,29 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"os"
"github.com/coreos/etcd/tools/benchmark/cmd"
)
func main() {
if err := cmd.RootCmd.Execute(); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(-1)
}
}

View File

@ -1,74 +0,0 @@
### etcd-dump-db
etcd-dump-db inspects etcd db files.
```
Usage:
etcd-dump-db [command]
Available Commands:
list-bucket bucket lists all buckets.
iterate-bucket iterate-bucket lists key-value pairs in reverse order.
hash hash computes the hash of db file.
Flags:
-h, --help[=false]: help for etcd-dump-db
Use "etcd-dump-db [command] --help" for more information about a command.
```
#### list-bucket [data dir or db file path]
Lists all buckets.
```
$ etcd-dump-db list-bucket agent01/agent.etcd
alarm
auth
authRoles
authUsers
cluster
key
lease
members
members_removed
meta
```
#### hash [data dir or db file path]
Computes the hash of db file.
```
$ etcd-dump-db hash agent01/agent.etcd
db path: agent01/agent.etcd/member/snap/db
Hash: 3700260467
$ etcd-dump-db hash agent02/agent.etcd
db path: agent02/agent.etcd/member/snap/db
Hash: 3700260467
$ etcd-dump-db hash agent03/agent.etcd
db path: agent03/agent.etcd/member/snap/db
Hash: 3700260467
```
#### iterate-bucket [data dir or db file path]
Lists key-value pairs in reverse order.
```
$ etcd-dump-db iterate-bucket agent03/agent.etcd --bucket=key --limit 3
key="\x00\x00\x00\x00\x005@x_\x00\x00\x00\x00\x00\x00\x00\tt", value="\n\x153640412599896088633_9"
key="\x00\x00\x00\x00\x005@x_\x00\x00\x00\x00\x00\x00\x00\bt", value="\n\x153640412599896088633_8"
key="\x00\x00\x00\x00\x005@x_\x00\x00\x00\x00\x00\x00\x00\at", value="\n\x153640412599896088633_7"
```

View File

@ -1,83 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"path/filepath"
bolt "github.com/coreos/bbolt"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/backend"
)
func snapDir(dataDir string) string {
return filepath.Join(dataDir, "member", "snap")
}
func getBuckets(dbPath string) (buckets []string, err error) {
db, derr := bolt.Open(dbPath, 0600, &bolt.Options{})
if derr != nil {
return nil, derr
}
defer db.Close()
err = db.View(func(tx *bolt.Tx) error {
return tx.ForEach(func(b []byte, _ *bolt.Bucket) error {
buckets = append(buckets, string(b))
return nil
})
})
return buckets, err
}
func iterateBucket(dbPath, bucket string, limit uint64) (err error) {
db, derr := bolt.Open(dbPath, 0600, &bolt.Options{})
if derr != nil {
return derr
}
defer db.Close()
err = db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte(bucket))
if b == nil {
return fmt.Errorf("got nil bucket for %s", bucket)
}
c := b.Cursor()
// iterate in reverse order (use First() and Next() for ascending order)
for k, v := c.Last(); k != nil; k, v = c.Prev() {
fmt.Printf("key=%q, value=%q\n", k, v)
limit--
if limit == 0 {
break
}
}
return nil
})
return err
}
func getHash(dbPath string) (hash uint32, err error) {
b := backend.NewDefaultBackend(dbPath)
return b.Hash(mvcc.DefaultIgnores)
}
// TODO: revert by revision and find specified hash value
// currently, it's hard because lease is in separate bucket
// and does not modify revision

View File

@ -1,16 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// etcd-dump-db inspects etcd db files.
package main

View File

@ -1,130 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"log"
"os"
"path/filepath"
"strings"
"github.com/spf13/cobra"
)
var (
rootCommand = &cobra.Command{
Use: "etcd-dump-db",
Short: "etcd-dump-db inspects etcd db files.",
}
listBucketCommand = &cobra.Command{
Use: "list-bucket [data dir or db file path]",
Short: "bucket lists all buckets.",
Run: listBucketCommandFunc,
}
iterateBucketCommand = &cobra.Command{
Use: "iterate-bucket [data dir or db file path]",
Short: "iterate-bucket lists key-value pairs in reverse order.",
Run: iterateBucketCommandFunc,
}
getHashCommand = &cobra.Command{
Use: "hash [data dir or db file path]",
Short: "hash computes the hash of db file.",
Run: getHashCommandFunc,
}
)
var (
iterateBucketName string
iterateBucketLimit uint64
)
func init() {
iterateBucketCommand.PersistentFlags().StringVar(&iterateBucketName, "bucket", "", "bucket name to iterate")
iterateBucketCommand.PersistentFlags().Uint64Var(&iterateBucketLimit, "limit", 0, "max number of key-value pairs to iterate (0< to iterate all)")
rootCommand.AddCommand(listBucketCommand)
rootCommand.AddCommand(iterateBucketCommand)
rootCommand.AddCommand(getHashCommand)
}
func main() {
if err := rootCommand.Execute(); err != nil {
fmt.Fprintln(os.Stdout, err)
os.Exit(1)
}
}
func listBucketCommandFunc(cmd *cobra.Command, args []string) {
if len(args) < 1 {
log.Fatalf("Must provide at least 1 argument (got %v)", args)
}
dp := args[0]
if !strings.HasSuffix(dp, "db") {
dp = filepath.Join(snapDir(dp), "db")
}
if !existFileOrDir(dp) {
log.Fatalf("%q does not exist", dp)
}
bts, err := getBuckets(dp)
if err != nil {
log.Fatal(err)
}
for _, b := range bts {
fmt.Println(b)
}
}
func iterateBucketCommandFunc(cmd *cobra.Command, args []string) {
if len(args) < 1 {
log.Fatalf("Must provide at least 1 argument (got %v)", args)
}
dp := args[0]
if !strings.HasSuffix(dp, "db") {
dp = filepath.Join(snapDir(dp), "db")
}
if !existFileOrDir(dp) {
log.Fatalf("%q does not exist", dp)
}
if iterateBucketName == "" {
log.Fatal("got empty bucket name")
}
err := iterateBucket(dp, iterateBucketName, iterateBucketLimit)
if err != nil {
log.Fatal(err)
}
}
func getHashCommandFunc(cmd *cobra.Command, args []string) {
if len(args) < 1 {
log.Fatalf("Must provide at least 1 argument (got %v)", args)
}
dp := args[0]
if !strings.HasSuffix(dp, "db") {
dp = filepath.Join(snapDir(dp), "db")
}
if !existFileOrDir(dp) {
log.Fatalf("%q does not exist", dp)
}
hash, err := getHash(dp)
if err != nil {
log.Fatal(err)
}
fmt.Printf("db path: %s\nHash: %d\n", dp, hash)
}

View File

@ -1,22 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import "os"
func existFileOrDir(name string) bool {
_, err := os.Stat(name)
return err == nil
}

View File

@ -1,16 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// etcd-dump-logs is a program for analyzing etcd server write ahead logs.
package main

View File

@ -1,162 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"flag"
"fmt"
"log"
"path/filepath"
"time"
"github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/pkg/pbutil"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/snap"
"github.com/coreos/etcd/wal"
"github.com/coreos/etcd/wal/walpb"
)
func main() {
from := flag.String("data-dir", "", "")
snapfile := flag.String("start-snap", "", "The base name of snapshot file to start dumping")
index := flag.Uint64("start-index", 0, "The index to start dumping")
flag.Parse()
if *from == "" {
log.Fatal("Must provide -data-dir flag.")
}
if *snapfile != "" && *index != 0 {
log.Fatal("start-snap and start-index flags cannot be used together.")
}
var (
walsnap walpb.Snapshot
snapshot *raftpb.Snapshot
err error
)
isIndex := *index != 0
if isIndex {
fmt.Printf("Start dumping log entries from index %d.\n", *index)
walsnap.Index = *index
} else {
if *snapfile == "" {
ss := snap.New(snapDir(*from))
snapshot, err = ss.Load()
} else {
snapshot, err = snap.Read(filepath.Join(snapDir(*from), *snapfile))
}
switch err {
case nil:
walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term
nodes := genIDSlice(snapshot.Metadata.ConfState.Nodes)
fmt.Printf("Snapshot:\nterm=%d index=%d nodes=%s\n",
walsnap.Term, walsnap.Index, nodes)
case snap.ErrNoSnapshot:
fmt.Printf("Snapshot:\nempty\n")
default:
log.Fatalf("Failed loading snapshot: %v", err)
}
fmt.Println("Start dupmping log entries from snapshot.")
}
w, err := wal.OpenForRead(walDir(*from), walsnap)
if err != nil {
log.Fatalf("Failed opening WAL: %v", err)
}
wmetadata, state, ents, err := w.ReadAll()
w.Close()
if err != nil && (!isIndex || err != wal.ErrSnapshotNotFound) {
log.Fatalf("Failed reading WAL: %v", err)
}
id, cid := parseWALMetadata(wmetadata)
vid := types.ID(state.Vote)
fmt.Printf("WAL metadata:\nnodeID=%s clusterID=%s term=%d commitIndex=%d vote=%s\n",
id, cid, state.Term, state.Commit, vid)
fmt.Printf("WAL entries:\n")
fmt.Printf("lastIndex=%d\n", ents[len(ents)-1].Index)
fmt.Printf("%4s\t%10s\ttype\tdata\n", "term", "index")
for _, e := range ents {
msg := fmt.Sprintf("%4d\t%10d", e.Term, e.Index)
switch e.Type {
case raftpb.EntryNormal:
msg = fmt.Sprintf("%s\tnorm", msg)
var rr etcdserverpb.InternalRaftRequest
if err := rr.Unmarshal(e.Data); err == nil {
msg = fmt.Sprintf("%s\t%s", msg, rr.String())
break
}
var r etcdserverpb.Request
if err := r.Unmarshal(e.Data); err == nil {
switch r.Method {
case "":
msg = fmt.Sprintf("%s\tnoop", msg)
case "SYNC":
msg = fmt.Sprintf("%s\tmethod=SYNC time=%q", msg, time.Unix(0, r.Time))
case "QGET", "DELETE":
msg = fmt.Sprintf("%s\tmethod=%s path=%s", msg, r.Method, excerpt(r.Path, 64, 64))
default:
msg = fmt.Sprintf("%s\tmethod=%s path=%s val=%s", msg, r.Method, excerpt(r.Path, 64, 64), excerpt(r.Val, 128, 0))
}
break
}
msg = fmt.Sprintf("%s\t???", msg)
case raftpb.EntryConfChange:
msg = fmt.Sprintf("%s\tconf", msg)
var r raftpb.ConfChange
if err := r.Unmarshal(e.Data); err != nil {
msg = fmt.Sprintf("%s\t???", msg)
} else {
msg = fmt.Sprintf("%s\tmethod=%s id=%s", msg, r.Type, types.ID(r.NodeID))
}
}
fmt.Println(msg)
}
}
func walDir(dataDir string) string { return filepath.Join(dataDir, "member", "wal") }
func snapDir(dataDir string) string { return filepath.Join(dataDir, "member", "snap") }
func parseWALMetadata(b []byte) (id, cid types.ID) {
var metadata etcdserverpb.Metadata
pbutil.MustUnmarshal(&metadata, b)
id = types.ID(metadata.NodeID)
cid = types.ID(metadata.ClusterID)
return id, cid
}
func genIDSlice(a []uint64) []types.ID {
ids := make([]types.ID, len(a))
for i, id := range a {
ids[i] = types.ID(id)
}
return ids
}
// excerpt replaces middle part with ellipsis and returns a double-quoted
// string safely escaped with Go syntax.
func excerpt(str string, pre, suf int) string {
if pre+suf > len(str) {
return fmt.Sprintf("%q", str)
}
return fmt.Sprintf("%q...%q", str[:pre], str[len(str)-suf:])
}

View File

@ -1,47 +0,0 @@
# etcd functional test suite
etcd functional test suite tests the functionality of an etcd cluster with a focus on failure resistance under high pressure. It sets up an etcd cluster and inject failures into the cluster by killing the process or isolate the network of the process. It expects the etcd cluster to recover within a short amount of time after fixing the fault.
etcd functional test suite has two components: etcd-agent and etcd-tester. etcd-agent runs on every test machines and etcd-tester is a single controller of the test. etcd-tester controls all the etcd-agent to start etcd clusters and simulate various failure cases.
## requirements
The environment of the cluster must be stable enough, so etcd test suite can assume that most of the failures are generated by itself.
## etcd agent
etcd agent is a daemon on each machines. It can start, stop, restart, isolate and terminate an etcd process. The agent exposes these functionality via HTTP RPC.
## etcd tester
etcd functional tester control the progress of the functional tests. It calls the RPC of the etcd agent to simulate various test cases. For example, it can start a three members cluster by sending three start RPC calls to three different etcd agents. It can make one of the member failed by sending stop RPC call to one etcd agent.
### Run locally
```
$ PASSES=functional ./test
```
### Run with Docker
To run locally, first build tester image:
```bash
pushd ../..
GO_VERSION=1.9.3 \
make build-docker-functional-tester \
-f ./hack/scripts-dev/Makefile
popd
```
And run [example scripts](./scripts).
```bash
./scripts/agent-1.sh
./scripts/agent-2.sh
./scripts/agent-3.sh
./scripts/tester-limit.sh
```

View File

@ -1,11 +0,0 @@
#!/usr/bin/env bash
if ! [[ "$0" =~ "tools/functional-tester/build" ]]; then
echo "must be run from repository root"
exit 255
fi
CGO_ENABLED=0 go build -a -installsuffix cgo -ldflags "-s" -o bin/etcd-agent ./cmd/tools/functional-tester/etcd-agent
CGO_ENABLED=0 go build -a -installsuffix cgo -ldflags "-s" -o bin/etcd-tester ./cmd/tools/functional-tester/etcd-tester
CGO_ENABLED=0 go build -a -installsuffix cgo -ldflags "-s" -o bin/etcd-runner ./cmd/tools/functional-tester/etcd-runner

View File

@ -1,372 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"net"
"net/url"
"os"
"os/exec"
"path/filepath"
"strconv"
"sync"
"syscall"
"time"
"github.com/coreos/etcd/pkg/fileutil"
"github.com/coreos/etcd/pkg/transport"
"github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
)
const (
stateUninitialized = "uninitialized"
stateStarted = "started"
stateStopped = "stopped"
stateTerminated = "terminated"
)
type Agent struct {
state string // the state of etcd process
cmd *exec.Cmd
logfile *os.File
cfg AgentConfig
pmu sync.Mutex
advertisePortToProxy map[int]transport.Proxy
}
type AgentConfig struct {
EtcdPath string
LogDir string
FailpointAddr string
}
func newAgent(cfg AgentConfig) (*Agent, error) {
// check if the file exists
_, err := os.Stat(cfg.EtcdPath)
if err != nil {
return nil, err
}
c := exec.Command(cfg.EtcdPath)
err = fileutil.TouchDirAll(cfg.LogDir)
if err != nil {
return nil, err
}
var f *os.File
f, err = os.Create(filepath.Join(cfg.LogDir, "etcd.log"))
if err != nil {
return nil, err
}
return &Agent{
state: stateUninitialized,
cmd: c,
logfile: f,
cfg: cfg,
advertisePortToProxy: make(map[int]transport.Proxy),
}, nil
}
// start starts a new etcd process with the given args.
func (a *Agent) start(args ...string) error {
args = append(args, "--data-dir", a.dataDir())
a.cmd = exec.Command(a.cmd.Path, args...)
a.cmd.Env = []string{"GOFAIL_HTTP=" + a.cfg.FailpointAddr}
a.cmd.Stdout = a.logfile
a.cmd.Stderr = a.logfile
err := a.cmd.Start()
if err != nil {
return err
}
a.state = stateStarted
a.pmu.Lock()
defer a.pmu.Unlock()
if len(a.advertisePortToProxy) == 0 {
// enough time for etcd start before setting up proxy
time.Sleep(time.Second)
var (
err error
s string
listenClientURL *url.URL
advertiseClientURL *url.URL
advertiseClientURLPort int
listenPeerURL *url.URL
advertisePeerURL *url.URL
advertisePeerURLPort int
)
for i := range args {
switch args[i] {
case "--listen-client-urls":
listenClientURL, err = url.Parse(args[i+1])
if err != nil {
return err
}
case "--advertise-client-urls":
advertiseClientURL, err = url.Parse(args[i+1])
if err != nil {
return err
}
_, s, err = net.SplitHostPort(advertiseClientURL.Host)
if err != nil {
return err
}
advertiseClientURLPort, err = strconv.Atoi(s)
if err != nil {
return err
}
case "--listen-peer-urls":
listenPeerURL, err = url.Parse(args[i+1])
if err != nil {
return err
}
case "--initial-advertise-peer-urls":
advertisePeerURL, err = url.Parse(args[i+1])
if err != nil {
return err
}
_, s, err = net.SplitHostPort(advertisePeerURL.Host)
if err != nil {
return err
}
advertisePeerURLPort, err = strconv.Atoi(s)
if err != nil {
return err
}
}
}
clientProxy := transport.NewProxy(transport.ProxyConfig{
From: *advertiseClientURL,
To: *listenClientURL,
})
select {
case err = <-clientProxy.Error():
return err
case <-time.After(time.Second):
}
a.advertisePortToProxy[advertiseClientURLPort] = clientProxy
peerProxy := transport.NewProxy(transport.ProxyConfig{
From: *advertisePeerURL,
To: *listenPeerURL,
})
select {
case err = <-peerProxy.Error():
return err
case <-time.After(time.Second):
}
a.advertisePortToProxy[advertisePeerURLPort] = peerProxy
}
return nil
}
// stop stops the existing etcd process the agent started.
func (a *Agent) stopWithSig(sig os.Signal) error {
if a.state != stateStarted {
return nil
}
a.pmu.Lock()
if len(a.advertisePortToProxy) > 0 {
for _, p := range a.advertisePortToProxy {
if err := p.Close(); err != nil {
a.pmu.Unlock()
return err
}
select {
case <-p.Done():
// enough time to release port
time.Sleep(time.Second)
case <-time.After(time.Second):
}
}
a.advertisePortToProxy = make(map[int]transport.Proxy)
}
a.pmu.Unlock()
err := stopWithSig(a.cmd, sig)
if err != nil {
return err
}
a.state = stateStopped
return nil
}
func stopWithSig(cmd *exec.Cmd, sig os.Signal) error {
err := cmd.Process.Signal(sig)
if err != nil {
return err
}
errc := make(chan error)
go func() {
_, ew := cmd.Process.Wait()
errc <- ew
close(errc)
}()
select {
case <-time.After(5 * time.Second):
cmd.Process.Kill()
case e := <-errc:
return e
}
err = <-errc
return err
}
// restart restarts the stopped etcd process.
func (a *Agent) restart() error {
return a.start(a.cmd.Args[1:]...)
}
func (a *Agent) cleanup() error {
// exit with stackstrace
if err := a.stopWithSig(syscall.SIGQUIT); err != nil {
return err
}
a.state = stateUninitialized
a.logfile.Close()
if err := archiveLogAndDataDir(a.cfg.LogDir, a.dataDir()); err != nil {
return err
}
if err := fileutil.TouchDirAll(a.cfg.LogDir); err != nil {
return err
}
f, err := os.Create(filepath.Join(a.cfg.LogDir, "etcd.log"))
if err != nil {
return err
}
a.logfile = f
// https://www.kernel.org/doc/Documentation/sysctl/vm.txt
// https://github.com/torvalds/linux/blob/master/fs/drop_caches.c
cmd := exec.Command("/bin/sh", "-c", `echo "echo 1 > /proc/sys/vm/drop_caches" | sudo sh`)
if err := cmd.Run(); err != nil {
plog.Infof("error when cleaning page cache (%v)", err)
}
return nil
}
// terminate stops the exiting etcd process the agent started
// and removes the data dir.
func (a *Agent) terminate() error {
err := a.stopWithSig(syscall.SIGTERM)
if err != nil {
return err
}
err = os.RemoveAll(a.dataDir())
if err != nil {
return err
}
a.state = stateTerminated
return nil
}
func (a *Agent) dropPort(port int) error {
a.pmu.Lock()
defer a.pmu.Unlock()
p, ok := a.advertisePortToProxy[port]
if !ok {
return fmt.Errorf("%d does not have proxy", port)
}
p.BlackholeTx()
p.BlackholeRx()
return nil
}
func (a *Agent) recoverPort(port int) error {
a.pmu.Lock()
defer a.pmu.Unlock()
p, ok := a.advertisePortToProxy[port]
if !ok {
return fmt.Errorf("%d does not have proxy", port)
}
p.UnblackholeTx()
p.UnblackholeRx()
return nil
}
func (a *Agent) setLatency(ms, rv int) error {
a.pmu.Lock()
defer a.pmu.Unlock()
if ms == 0 {
for _, p := range a.advertisePortToProxy {
p.UndelayTx()
p.UndelayRx()
}
}
for _, p := range a.advertisePortToProxy {
p.DelayTx(time.Duration(ms)*time.Millisecond, time.Duration(rv)*time.Millisecond)
p.DelayRx(time.Duration(ms)*time.Millisecond, time.Duration(rv)*time.Millisecond)
}
return nil
}
func (a *Agent) status() client.Status {
return client.Status{State: a.state}
}
func (a *Agent) dataDir() string {
return filepath.Join(a.cfg.LogDir, "etcd.data")
}
func existDir(fpath string) bool {
st, err := os.Stat(fpath)
if err != nil {
if os.IsNotExist(err) {
return false
}
} else {
return st.IsDir()
}
return false
}
func archiveLogAndDataDir(logDir string, datadir string) error {
dir := filepath.Join(logDir, "failure_archive", time.Now().Format(time.RFC3339))
if existDir(dir) {
dir = filepath.Join(logDir, "failure_archive", time.Now().Add(time.Second).Format(time.RFC3339))
}
if err := fileutil.TouchDirAll(dir); err != nil {
return err
}
if err := os.Rename(filepath.Join(logDir, "etcd.log"), filepath.Join(dir, "etcd.log")); err != nil {
if !os.IsNotExist(err) {
return err
}
}
if err := os.Rename(datadir, filepath.Join(dir, filepath.Base(datadir))); err != nil {
if !os.IsNotExist(err) {
return err
}
}
return nil
}

View File

@ -1,87 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"os"
"path/filepath"
"syscall"
"testing"
)
var etcdPath = filepath.Join(os.Getenv("GOPATH"), "bin/etcd")
func TestAgentStart(t *testing.T) {
defer os.Remove("etcd.log")
a := newTestAgent(t)
defer a.terminate()
err := a.start()
if err != nil {
t.Fatal(err)
}
}
func TestAgentRestart(t *testing.T) {
defer os.Remove("etcd.log")
a := newTestAgent(t)
defer a.terminate()
err := a.start()
if err != nil {
t.Fatal(err)
}
err = a.stopWithSig(syscall.SIGTERM)
if err != nil {
t.Fatal(err)
}
err = a.restart()
if err != nil {
t.Fatal(err)
}
}
func TestAgentTerminate(t *testing.T) {
defer os.Remove("etcd.log")
a := newTestAgent(t)
err := a.start()
if err != nil {
t.Fatal(err)
}
err = a.terminate()
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(a.dataDir()); !os.IsNotExist(err) {
t.Fatal(err)
}
}
// newTestAgent creates a test agent
func newTestAgent(t *testing.T) *Agent {
a, err := newAgent(AgentConfig{EtcdPath: etcdPath, LogDir: "etcd.log"})
if err != nil {
t.Fatal(err)
}
return a
}

View File

@ -1,118 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package client
import "net/rpc"
type Status struct {
// State gives the human-readable status of an agent (e.g., "started" or "terminated")
State string
// TODO: gather more informations
// TODO: memory usage, raft information, etc..
}
type Agent interface {
ID() uint64
// Start starts a new etcd with the given args on the agent machine.
Start(args ...string) (int, error)
// Stop stops the existing etcd the agent started.
Stop() error
// Restart restarts the existing etcd the agent stopped.
Restart() (int, error)
// Cleanup stops the exiting etcd the agent started, then archives log and its data dir.
Cleanup() error
// Terminate stops the exiting etcd the agent started and removes its data dir.
Terminate() error
// DropPort drops all network packets at the given port.
DropPort(port int) error
// RecoverPort stops dropping all network packets at the given port.
RecoverPort(port int) error
// SetLatency slows down network by introducing latency.
SetLatency(ms, rv int) error
// RemoveLatency removes latency introduced by SetLatency.
RemoveLatency() error
// Status returns the status of etcd on the agent
Status() (Status, error)
}
type agent struct {
endpoint string
rpcClient *rpc.Client
}
func NewAgent(endpoint string) (Agent, error) {
c, err := rpc.DialHTTP("tcp", endpoint)
if err != nil {
return nil, err
}
return &agent{endpoint, c}, nil
}
func (a *agent) Start(args ...string) (int, error) {
var pid int
err := a.rpcClient.Call("Agent.RPCStart", args, &pid)
if err != nil {
return -1, err
}
return pid, nil
}
func (a *agent) Stop() error {
return a.rpcClient.Call("Agent.RPCStop", struct{}{}, nil)
}
func (a *agent) Restart() (int, error) {
var pid int
err := a.rpcClient.Call("Agent.RPCRestart", struct{}{}, &pid)
if err != nil {
return -1, err
}
return pid, nil
}
func (a *agent) Cleanup() error {
return a.rpcClient.Call("Agent.RPCCleanup", struct{}{}, nil)
}
func (a *agent) Terminate() error {
return a.rpcClient.Call("Agent.RPCTerminate", struct{}{}, nil)
}
func (a *agent) DropPort(port int) error {
return a.rpcClient.Call("Agent.RPCDropPort", port, nil)
}
func (a *agent) RecoverPort(port int) error {
return a.rpcClient.Call("Agent.RPCRecoverPort", port, nil)
}
func (a *agent) SetLatency(ms, rv int) error {
return a.rpcClient.Call("Agent.RPCSetLatency", []int{ms, rv}, nil)
}
func (a *agent) RemoveLatency() error {
return a.rpcClient.Call("Agent.RPCRemoveLatency", struct{}{}, nil)
}
func (a *agent) Status() (Status, error) {
var s Status
err := a.rpcClient.Call("Agent.RPCStatus", struct{}{}, &s)
return s, err
}
func (a *agent) ID() uint64 {
panic("not implemented")
}

View File

@ -1,16 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package client provides a client implementation to control an etcd-agent.
package client

View File

@ -1,16 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// etcd-agent is a daemon for controlling an etcd process via HTTP RPC.
package main

View File

@ -1,47 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"flag"
"os"
"path/filepath"
"github.com/coreos/pkg/capnslog"
)
var plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "etcd-agent")
func main() {
etcdPath := flag.String("etcd-path", filepath.Join(os.Getenv("GOPATH"), "bin/etcd"), "the path to etcd binary")
etcdLogDir := flag.String("etcd-log-dir", "etcd-log", "directory to store etcd logs, data directories, failure archive")
port := flag.String("port", ":9027", "port to serve agent server")
failpointAddr := flag.String("failpoint-addr", ":2381", "interface for gofail's HTTP server")
flag.Parse()
cfg := AgentConfig{
EtcdPath: *etcdPath,
LogDir: *etcdLogDir,
FailpointAddr: *failpointAddr,
}
a, err := newAgent(cfg)
if err != nil {
plog.Fatal(err)
}
a.serveRPC(*port)
var done chan struct{}
<-done
}

View File

@ -1,131 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"net"
"net/http"
"net/rpc"
"syscall"
"github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
)
func (a *Agent) serveRPC(port string) {
rpc.Register(a)
rpc.HandleHTTP()
l, e := net.Listen("tcp", port)
if e != nil {
plog.Fatal(e)
}
plog.Println("agent listening on", port)
go http.Serve(l, nil)
}
func (a *Agent) RPCStart(args []string, pid *int) error {
plog.Printf("start etcd with args %v", args)
err := a.start(args...)
if err != nil {
plog.Println("error starting etcd", err)
return err
}
*pid = a.cmd.Process.Pid
return nil
}
func (a *Agent) RPCStop(args struct{}, reply *struct{}) error {
plog.Printf("stop etcd")
err := a.stopWithSig(syscall.SIGTERM)
if err != nil {
plog.Println("error stopping etcd", err)
return err
}
return nil
}
func (a *Agent) RPCRestart(args struct{}, pid *int) error {
plog.Printf("restart etcd")
err := a.restart()
if err != nil {
plog.Println("error restarting etcd", err)
return err
}
*pid = a.cmd.Process.Pid
return nil
}
func (a *Agent) RPCCleanup(args struct{}, reply *struct{}) error {
plog.Printf("cleanup etcd")
err := a.cleanup()
if err != nil {
plog.Println("error cleaning up etcd", err)
return err
}
return nil
}
func (a *Agent) RPCTerminate(args struct{}, reply *struct{}) error {
plog.Printf("terminate etcd")
err := a.terminate()
if err != nil {
plog.Println("error terminating etcd", err)
}
return nil
}
func (a *Agent) RPCDropPort(port int, reply *struct{}) error {
plog.Printf("drop port %d", port)
err := a.dropPort(port)
if err != nil {
plog.Println("error dropping port", err)
}
return nil
}
func (a *Agent) RPCRecoverPort(port int, reply *struct{}) error {
plog.Printf("recover port %d", port)
err := a.recoverPort(port)
if err != nil {
plog.Println("error recovering port", err)
}
return nil
}
func (a *Agent) RPCSetLatency(args []int, reply *struct{}) error {
if len(args) != 2 {
return fmt.Errorf("SetLatency needs two args, got (%v)", args)
}
plog.Printf("set latency of %dms (+/- %dms)", args[0], args[1])
err := a.setLatency(args[0], args[1])
if err != nil {
plog.Println("error setting latency", err)
}
return nil
}
func (a *Agent) RPCRemoveLatency(args struct{}, reply *struct{}) error {
plog.Println("removing latency")
err := a.setLatency(0, 0)
if err != nil {
plog.Println("error removing latency")
}
return nil
}
func (a *Agent) RPCStatus(args struct{}, status *client.Status) error {
*status = a.status()
return nil
}

View File

@ -1,166 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"io/ioutil"
"log"
"net/rpc"
"os"
"testing"
"github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
)
func init() {
defaultAgent, err := newAgent(AgentConfig{EtcdPath: etcdPath, LogDir: "etcd.log"})
if err != nil {
log.Panic(err)
}
defaultAgent.serveRPC(":9027")
}
func TestRPCStart(t *testing.T) {
c, err := rpc.DialHTTP("tcp", ":9027")
if err != nil {
t.Fatal(err)
}
dir, err := ioutil.TempDir(os.TempDir(), "etcd-agent")
if err != nil {
t.Fatal(err)
}
var pid int
err = c.Call("Agent.RPCStart", []string{"--data-dir", dir}, &pid)
if err != nil {
t.Fatal(err)
}
defer c.Call("Agent.RPCTerminate", struct{}{}, nil)
_, err = os.FindProcess(pid)
if err != nil {
t.Errorf("unexpected error %v when find process %d", err, pid)
}
}
func TestRPCRestart(t *testing.T) {
c, err := rpc.DialHTTP("tcp", ":9027")
if err != nil {
t.Fatal(err)
}
dir, err := ioutil.TempDir(os.TempDir(), "etcd-agent")
if err != nil {
t.Fatal(err)
}
var pid int
err = c.Call("Agent.RPCStart", []string{"--data-dir", dir}, &pid)
if err != nil {
t.Fatal(err)
}
defer c.Call("Agent.RPCTerminate", struct{}{}, nil)
err = c.Call("Agent.RPCStop", struct{}{}, nil)
if err != nil {
t.Fatal(err)
}
var npid int
err = c.Call("Agent.RPCRestart", struct{}{}, &npid)
if err != nil {
t.Fatal(err)
}
if npid == pid {
t.Errorf("pid = %v, want not equal to %d", npid, pid)
}
s, err := os.FindProcess(pid)
if err != nil {
t.Errorf("unexpected error %v when find process %d", err, pid)
}
_, err = s.Wait()
if err == nil {
t.Errorf("err = nil, want killed error")
}
_, err = os.FindProcess(npid)
if err != nil {
t.Errorf("unexpected error %v when find process %d", err, npid)
}
}
func TestRPCTerminate(t *testing.T) {
c, err := rpc.DialHTTP("tcp", ":9027")
if err != nil {
t.Fatal(err)
}
dir, err := ioutil.TempDir(os.TempDir(), "etcd-agent")
if err != nil {
t.Fatal(err)
}
var pid int
err = c.Call("Agent.RPCStart", []string{"--data-dir", dir}, &pid)
if err != nil {
t.Fatal(err)
}
err = c.Call("Agent.RPCTerminate", struct{}{}, nil)
if err != nil {
t.Fatal(err)
}
if _, err := os.Stat(dir); !os.IsNotExist(err) {
t.Fatal(err)
}
}
func TestRPCStatus(t *testing.T) {
c, err := rpc.DialHTTP("tcp", ":9027")
if err != nil {
t.Fatal(err)
}
var s client.Status
err = c.Call("Agent.RPCStatus", struct{}{}, &s)
if err != nil {
t.Fatal(err)
}
if s.State != stateTerminated {
t.Errorf("state = %s, want %s", s.State, stateTerminated)
}
dir, err := ioutil.TempDir(os.TempDir(), "etcd-agent")
if err != nil {
t.Fatal(err)
}
var pid int
err = c.Call("Agent.RPCStart", []string{"--data-dir", dir}, &pid)
if err != nil {
t.Fatal(err)
}
err = c.Call("Agent.RPCStatus", struct{}{}, &s)
if err != nil {
t.Fatal(err)
}
if s.State != stateStarted {
t.Errorf("state = %s, want %s", s.State, stateStarted)
}
err = c.Call("Agent.RPCTerminate", struct{}{}, nil)
if err != nil {
t.Fatal(err)
}
}

View File

@ -1,144 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package command
import (
"context"
"errors"
"fmt"
"github.com/coreos/etcd/clientv3/concurrency"
"github.com/spf13/cobra"
)
// NewElectionCommand returns the cobra command for "election runner".
func NewElectionCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "election [election name (defaults to 'elector')]",
Short: "Performs election operation",
Run: runElectionFunc,
}
cmd.Flags().IntVar(&totalClientConnections, "total-client-connections", 10, "total number of client connections")
return cmd
}
func runElectionFunc(cmd *cobra.Command, args []string) {
election := "elector"
if len(args) == 1 {
election = args[0]
}
if len(args) > 1 {
ExitWithError(ExitBadArgs, errors.New("election takes at most one argument"))
}
rcs := make([]roundClient, totalClientConnections)
validatec := make(chan struct{}, len(rcs))
// nextc closes when election is ready for next round.
nextc := make(chan struct{})
eps := endpointsFromFlag(cmd)
for i := range rcs {
v := fmt.Sprintf("%d", i)
observedLeader := ""
validateWaiters := 0
var rcNextc chan struct{}
setRcNextc := func() {
rcNextc = nextc
}
rcs[i].c = newClient(eps, dialTimeout)
var (
s *concurrency.Session
err error
)
for {
s, err = concurrency.NewSession(rcs[i].c)
if err == nil {
break
}
}
e := concurrency.NewElection(s, election)
rcs[i].acquire = func() (err error) {
ctx, cancel := context.WithCancel(context.Background())
donec := make(chan struct{})
go func() {
defer close(donec)
for ctx.Err() == nil {
if ol, ok := <-e.Observe(ctx); ok {
observedLeader = string(ol.Kvs[0].Value)
break
}
}
if observedLeader != v {
cancel()
}
}()
err = e.Campaign(ctx, v)
cancel()
<-donec
if err == nil {
observedLeader = v
}
if observedLeader == v {
validateWaiters = len(rcs)
}
select {
case <-ctx.Done():
return nil
default:
return err
}
}
rcs[i].validate = func() error {
l, err := e.Leader(context.TODO())
if err == nil && string(l.Kvs[0].Value) != observedLeader {
return fmt.Errorf("expected leader %q, got %q", observedLeader, l.Kvs[0].Value)
}
if err != nil {
return err
}
setRcNextc()
validatec <- struct{}{}
return nil
}
rcs[i].release = func() error {
for validateWaiters > 0 {
select {
case <-validatec:
validateWaiters--
default:
return fmt.Errorf("waiting on followers")
}
}
if err := e.Resign(context.TODO()); err != nil {
return err
}
if observedLeader == v {
oldNextc := nextc
nextc = make(chan struct{})
close(oldNextc)
}
<-rcNextc
observedLeader = ""
return nil
}
}
// each client creates 1 key from Campaign() and delete it from Resign()
// a round involves in 2*len(rcs) requests.
doRounds(rcs, rounds, 2*len(rcs))
}

View File

@ -1,42 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package command
import (
"fmt"
"os"
"github.com/coreos/etcd/client"
)
const (
// http://tldp.org/LDP/abs/html/exitcodes.html
ExitSuccess = iota
ExitError
ExitBadConnection
ExitInvalidInput // for txn, watch command
ExitBadFeature // provided a valid flag with an unsupported value
ExitInterrupted
ExitIO
ExitBadArgs = 128
)
func ExitWithError(code int, err error) {
fmt.Fprintln(os.Stderr, "Error: ", err)
if cerr, ok := err.(*client.ClusterError); ok {
fmt.Fprintln(os.Stderr, cerr.Detail())
}
os.Exit(code)
}

View File

@ -1,114 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package command
import (
"context"
"fmt"
"log"
"sync"
"time"
"github.com/coreos/etcd/clientv3"
"github.com/spf13/cobra"
"golang.org/x/time/rate"
)
// shared flags
var (
totalClientConnections int // total number of client connections to be made with server
endpoints []string
dialTimeout time.Duration
rounds int // total number of rounds to run; set to <= 0 to run forever.
reqRate int // maximum number of requests per second.
)
type roundClient struct {
c *clientv3.Client
progress int
acquire func() error
validate func() error
release func() error
}
func newClient(eps []string, timeout time.Duration) *clientv3.Client {
c, err := clientv3.New(clientv3.Config{
Endpoints: eps,
DialTimeout: time.Duration(timeout) * time.Second,
})
if err != nil {
log.Fatal(err)
}
return c
}
func doRounds(rcs []roundClient, rounds int, requests int) {
var wg sync.WaitGroup
wg.Add(len(rcs))
finished := make(chan struct{})
limiter := rate.NewLimiter(rate.Limit(reqRate), reqRate)
for i := range rcs {
go func(rc *roundClient) {
defer wg.Done()
for rc.progress < rounds || rounds <= 0 {
if err := limiter.WaitN(context.Background(), requests/len(rcs)); err != nil {
log.Panicf("rate limiter error %v", err)
}
for rc.acquire() != nil { /* spin */
}
if err := rc.validate(); err != nil {
log.Fatal(err)
}
time.Sleep(10 * time.Millisecond)
rc.progress++
finished <- struct{}{}
for rc.release() != nil { /* spin */
}
}
}(&rcs[i])
}
start := time.Now()
for i := 1; i < len(rcs)*rounds+1 || rounds <= 0; i++ {
select {
case <-finished:
if i%100 == 0 {
fmt.Printf("finished %d, took %v\n", i, time.Since(start))
start = time.Now()
}
case <-time.After(time.Minute):
log.Panic("no progress after 1 minute!")
}
}
wg.Wait()
for _, rc := range rcs {
rc.c.Close()
}
}
func endpointsFromFlag(cmd *cobra.Command) []string {
endpoints, err := cmd.Flags().GetStringSlice("endpoints")
if err != nil {
ExitWithError(ExitError, err)
}
return endpoints
}

View File

@ -1,174 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// copied from https://github.com/rkt/rkt/blob/master/rkt/help.go
package command
import (
"bytes"
"fmt"
"io"
"os"
"strings"
"text/tabwriter"
"text/template"
"github.com/coreos/etcd/version"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
)
var (
commandUsageTemplate *template.Template
templFuncs = template.FuncMap{
"descToLines": func(s string) []string {
// trim leading/trailing whitespace and split into slice of lines
return strings.Split(strings.Trim(s, "\n\t "), "\n")
},
"cmdName": func(cmd *cobra.Command, startCmd *cobra.Command) string {
parts := []string{cmd.Name()}
for cmd.HasParent() && cmd.Parent().Name() != startCmd.Name() {
cmd = cmd.Parent()
parts = append([]string{cmd.Name()}, parts...)
}
return strings.Join(parts, " ")
},
}
)
func init() {
commandUsage := `
{{ $cmd := .Cmd }}\
{{ $cmdname := cmdName .Cmd .Cmd.Root }}\
NAME:
{{ if not .Cmd.HasParent }}\
{{printf "\t%s - %s" .Cmd.Name .Cmd.Short}}
{{else}}\
{{printf "\t%s - %s" $cmdname .Cmd.Short}}
{{end}}\
USAGE:
{{printf "\t%s" .Cmd.UseLine}}
{{ if not .Cmd.HasParent }}\
VERSION:
{{printf "\t%s" .Version}}
{{end}}\
{{if .Cmd.HasSubCommands}}\
API VERSION:
{{printf "\t%s" .APIVersion}}
{{end}}\
{{if .Cmd.HasSubCommands}}\
COMMANDS:
{{range .SubCommands}}\
{{ $cmdname := cmdName . $cmd }}\
{{ if .Runnable }}\
{{printf "\t%s\t%s" $cmdname .Short}}
{{end}}\
{{end}}\
{{end}}\
{{ if .Cmd.Long }}\
DESCRIPTION:
{{range $line := descToLines .Cmd.Long}}{{printf "\t%s" $line}}
{{end}}\
{{end}}\
{{if .Cmd.HasLocalFlags}}\
OPTIONS:
{{.LocalFlags}}\
{{end}}\
{{if .Cmd.HasInheritedFlags}}\
GLOBAL OPTIONS:
{{.GlobalFlags}}\
{{end}}
`[1:]
commandUsageTemplate = template.Must(template.New("command_usage").Funcs(templFuncs).Parse(strings.Replace(commandUsage, "\\\n", "", -1)))
}
func etcdFlagUsages(flagSet *pflag.FlagSet) string {
x := new(bytes.Buffer)
flagSet.VisitAll(func(flag *pflag.Flag) {
if len(flag.Deprecated) > 0 {
return
}
var format string
if len(flag.Shorthand) > 0 {
format = " -%s, --%s"
} else {
format = " %s --%s"
}
if len(flag.NoOptDefVal) > 0 {
format = format + "["
}
if flag.Value.Type() == "string" {
// put quotes on the value
format = format + "=%q"
} else {
format = format + "=%s"
}
if len(flag.NoOptDefVal) > 0 {
format = format + "]"
}
format = format + "\t%s\n"
shorthand := flag.Shorthand
fmt.Fprintf(x, format, shorthand, flag.Name, flag.DefValue, flag.Usage)
})
return x.String()
}
func getSubCommands(cmd *cobra.Command) []*cobra.Command {
var subCommands []*cobra.Command
for _, subCmd := range cmd.Commands() {
subCommands = append(subCommands, subCmd)
subCommands = append(subCommands, getSubCommands(subCmd)...)
}
return subCommands
}
func usageFunc(cmd *cobra.Command) error {
subCommands := getSubCommands(cmd)
tabOut := getTabOutWithWriter(os.Stdout)
commandUsageTemplate.Execute(tabOut, struct {
Cmd *cobra.Command
LocalFlags string
GlobalFlags string
SubCommands []*cobra.Command
Version string
APIVersion string
}{
cmd,
etcdFlagUsages(cmd.LocalFlags()),
etcdFlagUsages(cmd.InheritedFlags()),
subCommands,
version.Version,
version.APIVersion,
})
tabOut.Flush()
return nil
}
func getTabOutWithWriter(writer io.Writer) *tabwriter.Writer {
aTabOut := new(tabwriter.Writer)
aTabOut.Init(writer, 0, 8, 1, '\t', 0)
return aTabOut
}

View File

@ -1,92 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package command
import (
"context"
"errors"
"fmt"
"log"
"time"
"github.com/coreos/etcd/clientv3"
"github.com/spf13/cobra"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
var (
leaseTTL int64
)
// NewLeaseRenewerCommand returns the cobra command for "lease-renewer runner".
func NewLeaseRenewerCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "lease-renewer",
Short: "Performs lease renew operation",
Run: runLeaseRenewerFunc,
}
cmd.Flags().Int64Var(&leaseTTL, "ttl", 5, "lease's ttl")
return cmd
}
func runLeaseRenewerFunc(cmd *cobra.Command, args []string) {
if len(args) > 0 {
ExitWithError(ExitBadArgs, errors.New("lease-renewer does not take any argument"))
}
eps := endpointsFromFlag(cmd)
c := newClient(eps, dialTimeout)
ctx := context.Background()
for {
var (
l *clientv3.LeaseGrantResponse
lk *clientv3.LeaseKeepAliveResponse
err error
)
for {
l, err = c.Lease.Grant(ctx, leaseTTL)
if err == nil {
break
}
}
expire := time.Now().Add(time.Duration(l.TTL-1) * time.Second)
for {
lk, err = c.Lease.KeepAliveOnce(ctx, l.ID)
ev, _ := status.FromError(err)
if ev.Code() == codes.NotFound {
if time.Since(expire) < 0 {
log.Fatalf("bad renew! exceeded: %v", time.Since(expire))
for {
lk, err = c.Lease.KeepAliveOnce(ctx, l.ID)
fmt.Println(lk, err)
time.Sleep(time.Second)
}
}
log.Fatalf("lost lease %d, expire: %v\n", l.ID, expire)
break
}
if err != nil {
continue
}
expire = time.Now().Add(time.Duration(lk.TTL-1) * time.Second)
log.Printf("renewed lease %d, expire: %v\n", lk.ID, expire)
time.Sleep(time.Duration(lk.TTL-2) * time.Second)
}
}
}

View File

@ -1,94 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package command
import (
"context"
"errors"
"fmt"
"sync"
"github.com/coreos/etcd/clientv3/concurrency"
"github.com/spf13/cobra"
)
// NewLockRacerCommand returns the cobra command for "lock-racer runner".
func NewLockRacerCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "lock-racer [name of lock (defaults to 'racers')]",
Short: "Performs lock race operation",
Run: runRacerFunc,
}
cmd.Flags().IntVar(&totalClientConnections, "total-client-connections", 10, "total number of client connections")
return cmd
}
func runRacerFunc(cmd *cobra.Command, args []string) {
racers := "racers"
if len(args) == 1 {
racers = args[0]
}
if len(args) > 1 {
ExitWithError(ExitBadArgs, errors.New("lock-racer takes at most one argument"))
}
rcs := make([]roundClient, totalClientConnections)
ctx := context.Background()
// mu ensures validate and release funcs are atomic.
var mu sync.Mutex
cnt := 0
eps := endpointsFromFlag(cmd)
for i := range rcs {
var (
s *concurrency.Session
err error
)
rcs[i].c = newClient(eps, dialTimeout)
for {
s, err = concurrency.NewSession(rcs[i].c)
if err == nil {
break
}
}
m := concurrency.NewMutex(s, racers)
rcs[i].acquire = func() error { return m.Lock(ctx) }
rcs[i].validate = func() error {
mu.Lock()
defer mu.Unlock()
if cnt++; cnt != 1 {
return fmt.Errorf("bad lock; count: %d", cnt)
}
return nil
}
rcs[i].release = func() error {
mu.Lock()
defer mu.Unlock()
if err := m.Unlock(ctx); err != nil {
return err
}
cnt = 0
return nil
}
}
// each client creates 1 key from NewMutex() and delete it from Unlock()
// a round involves in 2*len(rcs) requests.
doRounds(rcs, rounds, 2*len(rcs))
}

View File

@ -1,70 +0,0 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package command implements individual etcd-runner commands for the etcd-runner utility.
package command
import (
"log"
"math/rand"
"time"
"github.com/spf13/cobra"
)
const (
cliName = "etcd-runner"
cliDescription = "Stress tests using clientv3 functionality.."
defaultDialTimeout = 2 * time.Second
)
var (
rootCmd = &cobra.Command{
Use: cliName,
Short: cliDescription,
SuggestFor: []string{"etcd-runner"},
}
)
func init() {
cobra.EnablePrefixMatching = true
rand.Seed(time.Now().UnixNano())
log.SetFlags(log.Lmicroseconds)
rootCmd.PersistentFlags().StringSliceVar(&endpoints, "endpoints", []string{"127.0.0.1:2379"}, "gRPC endpoints")
rootCmd.PersistentFlags().DurationVar(&dialTimeout, "dial-timeout", defaultDialTimeout, "dial timeout for client connections")
rootCmd.PersistentFlags().IntVar(&reqRate, "req-rate", 30, "maximum number of requests per second")
rootCmd.PersistentFlags().IntVar(&rounds, "rounds", 100, "number of rounds to run; 0 to run forever")
rootCmd.AddCommand(
NewElectionCommand(),
NewLeaseRenewerCommand(),
NewLockRacerCommand(),
NewWatchCommand(),
)
}
func Start() {
rootCmd.SetUsageFunc(usageFunc)
// Make help just show the usage
rootCmd.SetHelpTemplate(`{{.UsageString}}`)
if err := rootCmd.Execute(); err != nil {
ExitWithError(ExitError, err)
}
}

View File

@ -1,210 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package command
import (
"context"
"errors"
"fmt"
"log"
"sync"
"time"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/pkg/stringutil"
"github.com/spf13/cobra"
"golang.org/x/time/rate"
)
var (
runningTime time.Duration // time for which operation should be performed
noOfPrefixes int // total number of prefixes which will be watched upon
watchPerPrefix int // number of watchers per prefix
watchPrefix string // prefix append to keys in watcher
totalKeys int // total number of keys for operation
)
// NewWatchCommand returns the cobra command for "watcher runner".
func NewWatchCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "watcher",
Short: "Performs watch operation",
Run: runWatcherFunc,
}
cmd.Flags().DurationVar(&runningTime, "running-time", 60, "number of seconds to run")
cmd.Flags().StringVar(&watchPrefix, "prefix", "", "the prefix to append on all keys")
cmd.Flags().IntVar(&noOfPrefixes, "total-prefixes", 10, "total no of prefixes to use")
cmd.Flags().IntVar(&watchPerPrefix, "watch-per-prefix", 10, "number of watchers per prefix")
cmd.Flags().IntVar(&totalKeys, "total-keys", 1000, "total number of keys to watch")
return cmd
}
func runWatcherFunc(cmd *cobra.Command, args []string) {
if len(args) > 0 {
ExitWithError(ExitBadArgs, errors.New("watcher does not take any argument"))
}
ctx := context.Background()
for round := 0; round < rounds || rounds <= 0; round++ {
fmt.Println("round", round)
performWatchOnPrefixes(ctx, cmd, round)
}
}
func performWatchOnPrefixes(ctx context.Context, cmd *cobra.Command, round int) {
keyPerPrefix := totalKeys / noOfPrefixes
prefixes := stringutil.UniqueStrings(5, noOfPrefixes)
keys := stringutil.RandomStrings(10, keyPerPrefix)
roundPrefix := fmt.Sprintf("%16x", round)
eps := endpointsFromFlag(cmd)
var (
revision int64
wg sync.WaitGroup
gr *clientv3.GetResponse
err error
)
client := newClient(eps, dialTimeout)
defer client.Close()
gr, err = getKey(ctx, client, "non-existent")
if err != nil {
log.Fatalf("failed to get the initial revision: %v", err)
}
revision = gr.Header.Revision
ctxt, cancel := context.WithDeadline(ctx, time.Now().Add(runningTime*time.Second))
defer cancel()
// generate and put keys in cluster
limiter := rate.NewLimiter(rate.Limit(reqRate), reqRate)
go func() {
for _, key := range keys {
for _, prefix := range prefixes {
if err = limiter.Wait(ctxt); err != nil {
return
}
if err = putKeyAtMostOnce(ctxt, client, watchPrefix+"-"+roundPrefix+"-"+prefix+"-"+key); err != nil {
log.Fatalf("failed to put key: %v", err)
return
}
}
}
}()
ctxc, cancelc := context.WithCancel(ctx)
wcs := make([]clientv3.WatchChan, 0)
rcs := make([]*clientv3.Client, 0)
for _, prefix := range prefixes {
for j := 0; j < watchPerPrefix; j++ {
rc := newClient(eps, dialTimeout)
rcs = append(rcs, rc)
wprefix := watchPrefix + "-" + roundPrefix + "-" + prefix
wc := rc.Watch(ctxc, wprefix, clientv3.WithPrefix(), clientv3.WithRev(revision))
wcs = append(wcs, wc)
wg.Add(1)
go func() {
defer wg.Done()
checkWatchResponse(wc, wprefix, keys)
}()
}
}
wg.Wait()
cancelc()
// verify all watch channels are closed
for e, wc := range wcs {
if _, ok := <-wc; ok {
log.Fatalf("expected wc to be closed, but received %v", e)
}
}
for _, rc := range rcs {
rc.Close()
}
if err = deletePrefix(ctx, client, watchPrefix); err != nil {
log.Fatalf("failed to clean up keys after test: %v", err)
}
}
func checkWatchResponse(wc clientv3.WatchChan, prefix string, keys []string) {
for n := 0; n < len(keys); {
wr, more := <-wc
if !more {
log.Fatalf("expect more keys (received %d/%d) for %s", n, len(keys), prefix)
}
for _, event := range wr.Events {
expectedKey := prefix + "-" + keys[n]
receivedKey := string(event.Kv.Key)
if expectedKey != receivedKey {
log.Fatalf("expected key %q, got %q for prefix : %q\n", expectedKey, receivedKey, prefix)
}
n++
}
}
}
func putKeyAtMostOnce(ctx context.Context, client *clientv3.Client, key string) error {
gr, err := getKey(ctx, client, key)
if err != nil {
return err
}
var modrev int64
if len(gr.Kvs) > 0 {
modrev = gr.Kvs[0].ModRevision
}
for ctx.Err() == nil {
_, err := client.Txn(ctx).If(clientv3.Compare(clientv3.ModRevision(key), "=", modrev)).Then(clientv3.OpPut(key, key)).Commit()
if err == nil {
return nil
}
}
return ctx.Err()
}
func deletePrefix(ctx context.Context, client *clientv3.Client, key string) error {
for ctx.Err() == nil {
if _, err := client.Delete(ctx, key, clientv3.WithPrefix()); err == nil {
return nil
}
}
return ctx.Err()
}
func getKey(ctx context.Context, client *clientv3.Client, key string) (*clientv3.GetResponse, error) {
for ctx.Err() == nil {
if gr, err := client.Get(ctx, key); err == nil {
return gr, nil
}
}
return nil, ctx.Err()
}

View File

@ -1,16 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// etcd-runner is a program for testing etcd clientv3 features against a fault injected cluster.
package main

View File

@ -1,22 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// etcd-runner is a command line application that performs tests on etcd.
package main
import "github.com/coreos/etcd/tools/functional-tester/etcd-runner/command"
func main() {
command.Start()
}

View File

@ -1,264 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"context"
"fmt"
"time"
"github.com/coreos/etcd/clientv3"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"google.golang.org/grpc"
)
const (
retries = 7
)
type Checker interface {
// Check returns an error if the system fails a consistency check.
Check() error
}
type hashAndRevGetter interface {
getRevisionHash() (revs map[string]int64, hashes map[string]int64, err error)
}
type hashChecker struct {
hrg hashAndRevGetter
}
func newHashChecker(hrg hashAndRevGetter) Checker { return &hashChecker{hrg} }
const leaseCheckerTimeout = 10 * time.Second
func (hc *hashChecker) checkRevAndHashes() (err error) {
var (
revs map[string]int64
hashes map[string]int64
)
// retries in case of transient failure or etcd cluster has not stablized yet.
for i := 0; i < retries; i++ {
revs, hashes, err = hc.hrg.getRevisionHash()
if err != nil {
plog.Warningf("retry %d. failed to retrieve revison and hash (%v)", i, err)
} else {
sameRev := getSameValue(revs)
sameHashes := getSameValue(hashes)
if sameRev && sameHashes {
return nil
}
plog.Warningf("retry %d. etcd cluster is not stable: [revisions: %v] and [hashes: %v]", i, revs, hashes)
}
time.Sleep(time.Second)
}
if err != nil {
return fmt.Errorf("failed revision and hash check (%v)", err)
}
return fmt.Errorf("etcd cluster is not stable: [revisions: %v] and [hashes: %v]", revs, hashes)
}
func (hc *hashChecker) Check() error {
return hc.checkRevAndHashes()
}
type leaseChecker struct {
endpoint string
ls *leaseStresser
leaseClient pb.LeaseClient
kvc pb.KVClient
}
func (lc *leaseChecker) Check() error {
conn, err := grpc.Dial(lc.ls.endpoint, grpc.WithInsecure(), grpc.WithBackoffMaxDelay(1))
if err != nil {
return fmt.Errorf("%v (%s)", err, lc.ls.endpoint)
}
defer func() {
if conn != nil {
conn.Close()
}
}()
lc.kvc = pb.NewKVClient(conn)
lc.leaseClient = pb.NewLeaseClient(conn)
if err := lc.check(true, lc.ls.revokedLeases.leases); err != nil {
return err
}
if err := lc.check(false, lc.ls.aliveLeases.leases); err != nil {
return err
}
return lc.checkShortLivedLeases()
}
// checkShortLivedLeases ensures leases expire.
func (lc *leaseChecker) checkShortLivedLeases() error {
ctx, cancel := context.WithTimeout(context.Background(), leaseCheckerTimeout)
errc := make(chan error)
defer cancel()
for leaseID := range lc.ls.shortLivedLeases.leases {
go func(id int64) {
errc <- lc.checkShortLivedLease(ctx, id)
}(leaseID)
}
var errs []error
for range lc.ls.shortLivedLeases.leases {
if err := <-errc; err != nil {
errs = append(errs, err)
}
}
return errsToError(errs)
}
func (lc *leaseChecker) checkShortLivedLease(ctx context.Context, leaseID int64) (err error) {
// retry in case of transient failure or lease is expired but not yet revoked due to the fact that etcd cluster didn't have enought time to delete it.
var resp *pb.LeaseTimeToLiveResponse
for i := 0; i < retries; i++ {
resp, err = lc.getLeaseByID(ctx, leaseID)
// lease not found, for ~v3.1 compatibilities, check ErrLeaseNotFound
if (err == nil && resp.TTL == -1) || (err != nil && rpctypes.Error(err) == rpctypes.ErrLeaseNotFound) {
return nil
}
if err != nil {
plog.Debugf("retry %d. failed to retrieve lease %v error (%v)", i, leaseID, err)
continue
}
if resp.TTL > 0 {
plog.Debugf("lease %v is not expired. sleep for %d until it expires.", leaseID, resp.TTL)
time.Sleep(time.Duration(resp.TTL) * time.Second)
} else {
plog.Debugf("retry %d. lease %v is expired but not yet revoked", i, leaseID)
time.Sleep(time.Second)
}
if err = lc.checkLease(ctx, false, leaseID); err != nil {
continue
}
return nil
}
return err
}
func (lc *leaseChecker) checkLease(ctx context.Context, expired bool, leaseID int64) error {
keysExpired, err := lc.hasKeysAttachedToLeaseExpired(ctx, leaseID)
if err != nil {
plog.Errorf("hasKeysAttachedToLeaseExpired error %v (endpoint %q)", err, lc.endpoint)
return err
}
leaseExpired, err := lc.hasLeaseExpired(ctx, leaseID)
if err != nil {
plog.Errorf("hasLeaseExpired error %v (endpoint %q)", err, lc.endpoint)
return err
}
if leaseExpired != keysExpired {
return fmt.Errorf("lease %v expiration mismatch (lease expired=%v, keys expired=%v)", leaseID, leaseExpired, keysExpired)
}
if leaseExpired != expired {
return fmt.Errorf("lease %v expected expired=%v, got %v", leaseID, expired, leaseExpired)
}
return nil
}
func (lc *leaseChecker) check(expired bool, leases map[int64]time.Time) error {
ctx, cancel := context.WithTimeout(context.Background(), leaseCheckerTimeout)
defer cancel()
for leaseID := range leases {
if err := lc.checkLease(ctx, expired, leaseID); err != nil {
return err
}
}
return nil
}
func (lc *leaseChecker) getLeaseByID(ctx context.Context, leaseID int64) (*pb.LeaseTimeToLiveResponse, error) {
ltl := &pb.LeaseTimeToLiveRequest{ID: leaseID, Keys: true}
return lc.leaseClient.LeaseTimeToLive(ctx, ltl, grpc.FailFast(false))
}
func (lc *leaseChecker) hasLeaseExpired(ctx context.Context, leaseID int64) (bool, error) {
// keep retrying until lease's state is known or ctx is being canceled
for ctx.Err() == nil {
resp, err := lc.getLeaseByID(ctx, leaseID)
if err != nil {
// for ~v3.1 compatibilities
if rpctypes.Error(err) == rpctypes.ErrLeaseNotFound {
return true, nil
}
} else {
return resp.TTL == -1, nil
}
plog.Warningf("hasLeaseExpired %v resp %v error %v (endpoint %q)", leaseID, resp, err, lc.endpoint)
}
return false, ctx.Err()
}
// The keys attached to the lease has the format of "<leaseID>_<idx>" where idx is the ordering key creation
// Since the format of keys contains about leaseID, finding keys base on "<leaseID>" prefix
// determines whether the attached keys for a given leaseID has been deleted or not
func (lc *leaseChecker) hasKeysAttachedToLeaseExpired(ctx context.Context, leaseID int64) (bool, error) {
resp, err := lc.kvc.Range(ctx, &pb.RangeRequest{
Key: []byte(fmt.Sprintf("%d", leaseID)),
RangeEnd: []byte(clientv3.GetPrefixRangeEnd(fmt.Sprintf("%d", leaseID))),
}, grpc.FailFast(false))
if err != nil {
plog.Errorf("retrieving keys attached to lease %v error %v (endpoint %q)", leaseID, err, lc.endpoint)
return false, err
}
return len(resp.Kvs) == 0, nil
}
// compositeChecker implements a checker that runs a slice of Checkers concurrently.
type compositeChecker struct{ checkers []Checker }
func newCompositeChecker(checkers []Checker) Checker {
return &compositeChecker{checkers}
}
func (cchecker *compositeChecker) Check() error {
errc := make(chan error)
for _, c := range cchecker.checkers {
go func(chk Checker) { errc <- chk.Check() }(c)
}
var errs []error
for range cchecker.checkers {
if err := <-errc; err != nil {
errs = append(errs, err)
}
}
return errsToError(errs)
}
type runnerChecker struct {
errc chan error
}
func (rc *runnerChecker) Check() error {
select {
case err := <-rc.errc:
return err
default:
return nil
}
}
type noChecker struct{}
func newNoChecker() Checker { return &noChecker{} }
func (nc *noChecker) Check() error { return nil }

View File

@ -1,261 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"context"
"fmt"
"math/rand"
"net"
"strings"
"time"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
"google.golang.org/grpc"
)
// agentConfig holds information needed to interact/configure an agent and its etcd process
type agentConfig struct {
endpoint string
clientPort int
advertiseClientPort int
peerPort int
advertisePeerPort int
failpointPort int
}
type cluster struct {
agents []agentConfig
Size int
Members []*member
}
type ClusterStatus struct {
AgentStatuses map[string]client.Status
}
func (c *cluster) bootstrap() error {
size := len(c.agents)
members := make([]*member, size)
memberNameURLs := make([]string, size)
for i, a := range c.agents {
agent, err := client.NewAgent(a.endpoint)
if err != nil {
return err
}
host, _, err := net.SplitHostPort(a.endpoint)
if err != nil {
return err
}
members[i] = &member{
Agent: agent,
Endpoint: a.endpoint,
Name: fmt.Sprintf("etcd-%d", i),
ClientURL: fmt.Sprintf("http://%s:%d", host, a.clientPort),
AdvertiseClientURL: fmt.Sprintf("http://%s:%d", host, a.advertiseClientPort),
PeerURL: fmt.Sprintf("http://%s:%d", host, a.peerPort),
AdvertisePeerURL: fmt.Sprintf("http://%s:%d", host, a.advertisePeerPort),
FailpointURL: fmt.Sprintf("http://%s:%d", host, a.failpointPort),
}
memberNameURLs[i] = members[i].ClusterEntry()
}
clusterStr := strings.Join(memberNameURLs, ",")
token := fmt.Sprint(rand.Int())
for i, m := range members {
flags := append(
m.Flags(),
"--initial-cluster-token", token,
"--initial-cluster", clusterStr,
"--snapshot-count", "10000")
if _, err := m.Agent.Start(flags...); err != nil {
// cleanup
for _, m := range members[:i] {
m.Agent.Terminate()
}
return err
}
}
c.Size = size
c.Members = members
return nil
}
func (c *cluster) Reset() error { return c.bootstrap() }
func (c *cluster) WaitHealth() error {
var err error
// wait 60s to check cluster health.
// TODO: set it to a reasonable value. It is set that high because
// follower may use long time to catch up the leader when reboot under
// reasonable workload (https://github.com/coreos/etcd/issues/2698)
for i := 0; i < 60; i++ {
for _, m := range c.Members {
if err = m.SetHealthKeyV3(); err != nil {
break
}
}
if err == nil {
return nil
}
plog.Warningf("#%d setHealthKey error (%v)", i, err)
time.Sleep(time.Second)
}
return err
}
// GetLeader returns the index of leader and error if any.
func (c *cluster) GetLeader() (int, error) {
for i, m := range c.Members {
isLeader, err := m.IsLeader()
if isLeader || err != nil {
return i, err
}
}
return 0, fmt.Errorf("no leader found")
}
func (c *cluster) Cleanup() error {
var lasterr error
for _, m := range c.Members {
if err := m.Agent.Cleanup(); err != nil {
lasterr = err
}
}
return lasterr
}
func (c *cluster) Terminate() {
for _, m := range c.Members {
m.Agent.Terminate()
}
}
func (c *cluster) Status() ClusterStatus {
cs := ClusterStatus{
AgentStatuses: make(map[string]client.Status),
}
for _, m := range c.Members {
s, err := m.Agent.Status()
// TODO: add a.Desc() as a key of the map
desc := m.Endpoint
if err != nil {
cs.AgentStatuses[desc] = client.Status{State: "unknown"}
plog.Printf("failed to get the status of agent [%s]", desc)
}
cs.AgentStatuses[desc] = s
}
return cs
}
// maxRev returns the maximum revision found on the cluster.
func (c *cluster) maxRev() (rev int64, err error) {
ctx, cancel := context.WithTimeout(context.TODO(), time.Second)
defer cancel()
revc, errc := make(chan int64, len(c.Members)), make(chan error, len(c.Members))
for i := range c.Members {
go func(m *member) {
mrev, merr := m.Rev(ctx)
revc <- mrev
errc <- merr
}(c.Members[i])
}
for i := 0; i < len(c.Members); i++ {
if merr := <-errc; merr != nil {
err = merr
}
if mrev := <-revc; mrev > rev {
rev = mrev
}
}
return rev, err
}
func (c *cluster) getRevisionHash() (map[string]int64, map[string]int64, error) {
revs := make(map[string]int64)
hashes := make(map[string]int64)
for _, m := range c.Members {
rev, hash, err := m.RevHash()
if err != nil {
return nil, nil, err
}
revs[m.ClientURL] = rev
hashes[m.ClientURL] = hash
}
return revs, hashes, nil
}
func (c *cluster) compactKV(rev int64, timeout time.Duration) (err error) {
if rev <= 0 {
return nil
}
for i, m := range c.Members {
u := m.ClientURL
conn, derr := m.dialGRPC()
if derr != nil {
plog.Printf("[compact kv #%d] dial error %v (endpoint %s)", i, derr, u)
err = derr
continue
}
kvc := pb.NewKVClient(conn)
ctx, cancel := context.WithTimeout(context.Background(), timeout)
plog.Printf("[compact kv #%d] starting (endpoint %s)", i, u)
_, cerr := kvc.Compact(ctx, &pb.CompactionRequest{Revision: rev, Physical: true}, grpc.FailFast(false))
cancel()
conn.Close()
succeed := true
if cerr != nil {
if strings.Contains(cerr.Error(), "required revision has been compacted") && i > 0 {
plog.Printf("[compact kv #%d] already compacted (endpoint %s)", i, u)
} else {
plog.Warningf("[compact kv #%d] error %v (endpoint %s)", i, cerr, u)
err = cerr
succeed = false
}
}
if succeed {
plog.Printf("[compact kv #%d] done (endpoint %s)", i, u)
}
}
return err
}
func (c *cluster) checkCompact(rev int64) error {
if rev == 0 {
return nil
}
for _, m := range c.Members {
if err := m.CheckCompact(rev); err != nil {
return err
}
}
return nil
}
func (c *cluster) defrag() error {
for _, m := range c.Members {
if err := m.Defrag(); err != nil {
return err
}
}
return nil
}

View File

@ -1,16 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// etcd-tester is a single controller for all etcd-agents to manage an etcd cluster and simulate failures.
package main

View File

@ -1,97 +0,0 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"io/ioutil"
"os/exec"
"syscall"
"golang.org/x/time/rate"
)
type runnerStresser struct {
cmd *exec.Cmd
cmdStr string
args []string
rl *rate.Limiter
reqRate int
errc chan error
donec chan struct{}
}
func newRunnerStresser(cmdStr string, args []string, rl *rate.Limiter, reqRate int) *runnerStresser {
rl.SetLimit(rl.Limit() - rate.Limit(reqRate))
return &runnerStresser{
cmdStr: cmdStr,
args: args,
rl: rl,
reqRate: reqRate,
errc: make(chan error, 1),
donec: make(chan struct{}),
}
}
func (rs *runnerStresser) setupOnce() (err error) {
if rs.cmd != nil {
return nil
}
rs.cmd = exec.Command(rs.cmdStr, rs.args...)
stderr, err := rs.cmd.StderrPipe()
if err != nil {
return err
}
go func() {
defer close(rs.donec)
out, err := ioutil.ReadAll(stderr)
if err != nil {
rs.errc <- err
} else {
rs.errc <- fmt.Errorf("(%v %v) stderr %v", rs.cmdStr, rs.args, string(out))
}
}()
return rs.cmd.Start()
}
func (rs *runnerStresser) Stress() (err error) {
if err = rs.setupOnce(); err != nil {
return err
}
return syscall.Kill(rs.cmd.Process.Pid, syscall.SIGCONT)
}
func (rs *runnerStresser) Pause() {
syscall.Kill(rs.cmd.Process.Pid, syscall.SIGSTOP)
}
func (rs *runnerStresser) Close() {
syscall.Kill(rs.cmd.Process.Pid, syscall.SIGINT)
rs.cmd.Wait()
<-rs.donec
rs.rl.SetLimit(rs.rl.Limit() + rate.Limit(rs.reqRate))
}
func (rs *runnerStresser) ModifiedKeys() int64 {
return 1
}
func (rs *runnerStresser) Checker() Checker {
return &runnerChecker{rs.errc}
}

View File

@ -1,160 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"io/ioutil"
"net/http"
"strings"
"sync"
"time"
)
type failpointStats struct {
// crashes counts the number of crashes for a failpoint
crashes map[string]int
// mu protects crashes
mu sync.Mutex
}
var fpStats failpointStats
func failpointFailures(c *cluster, failpoints []string) (ret []failure, err error) {
var fps []string
fps, err = failpointPaths(c.Members[0].FailpointURL)
if err != nil {
return nil, err
}
// create failure objects for all failpoints
for _, fp := range fps {
if len(fp) == 0 {
continue
}
fpFails := failuresFromFailpoint(fp, failpoints)
// wrap in delays so failpoint has time to trigger
for i, fpf := range fpFails {
if strings.Contains(fp, "Snap") {
// hack to trigger snapshot failpoints
fpFails[i] = &failureUntilSnapshot{fpf}
} else {
fpFails[i] = &failureDelay{fpf, 3 * time.Second}
}
}
ret = append(ret, fpFails...)
}
fpStats.crashes = make(map[string]int)
return ret, err
}
func failpointPaths(endpoint string) ([]string, error) {
resp, err := http.Get(endpoint)
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, rerr := ioutil.ReadAll(resp.Body)
if rerr != nil {
return nil, rerr
}
var fps []string
for _, l := range strings.Split(string(body), "\n") {
fp := strings.Split(l, "=")[0]
fps = append(fps, fp)
}
return fps, nil
}
// failpoints follows FreeBSD KFAIL_POINT syntax.
// e.g. panic("etcd-tester"),1*sleep(1000)->panic("etcd-tester")
func failuresFromFailpoint(fp string, failpoints []string) (fs []failure) {
recov := makeRecoverFailpoint(fp)
for _, failpoint := range failpoints {
inject := makeInjectFailpoint(fp, failpoint)
fs = append(fs, []failure{
&failureOne{
description: description(fmt.Sprintf("failpoint %s (one: %s)", fp, failpoint)),
injectMember: inject,
recoverMember: recov,
},
&failureAll{
description: description(fmt.Sprintf("failpoint %s (all: %s)", fp, failpoint)),
injectMember: inject,
recoverMember: recov,
},
&failureMajority{
description: description(fmt.Sprintf("failpoint %s (majority: %s)", fp, failpoint)),
injectMember: inject,
recoverMember: recov,
},
&failureLeader{
failureByFunc{
description: description(fmt.Sprintf("failpoint %s (leader: %s)", fp, failpoint)),
injectMember: inject,
recoverMember: recov,
},
0,
},
}...)
}
return fs
}
func makeInjectFailpoint(fp, val string) injectMemberFunc {
return func(m *member) (err error) {
return putFailpoint(m.FailpointURL, fp, val)
}
}
func makeRecoverFailpoint(fp string) recoverMemberFunc {
return func(m *member) error {
if err := delFailpoint(m.FailpointURL, fp); err == nil {
return nil
}
// node not responding, likely dead from fp panic; restart
fpStats.mu.Lock()
fpStats.crashes[fp]++
fpStats.mu.Unlock()
return recoverStop(m)
}
}
func putFailpoint(ep, fp, val string) error {
req, _ := http.NewRequest(http.MethodPut, ep+"/"+fp, strings.NewReader(val))
c := http.Client{}
resp, err := c.Do(req)
if err != nil {
return err
}
resp.Body.Close()
if resp.StatusCode/100 != 2 {
return fmt.Errorf("failed to PUT %s=%s at %s (%v)", fp, val, ep, resp.Status)
}
return nil
}
func delFailpoint(ep, fp string) error {
req, _ := http.NewRequest(http.MethodDelete, ep+"/"+fp, strings.NewReader(""))
c := http.Client{}
resp, err := c.Do(req)
if err != nil {
return err
}
resp.Body.Close()
if resp.StatusCode/100 != 2 {
return fmt.Errorf("failed to DELETE %s at %s (%v)", fp, ep, resp.Status)
}
return nil
}

View File

@ -1,205 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"math/rand"
"os/exec"
"time"
)
type failure interface {
// Inject injeccts the failure into the testing cluster at the given
// round. When calling the function, the cluster should be in health.
Inject(c *cluster, round int) error
// Recover recovers the injected failure caused by the injection of the
// given round and wait for the recovery of the testing cluster.
Recover(c *cluster, round int) error
// Desc returns a description of the failure
Desc() string
}
type description string
func (d description) Desc() string { return string(d) }
type injectMemberFunc func(*member) error
type recoverMemberFunc func(*member) error
type failureByFunc struct {
description
injectMember injectMemberFunc
recoverMember recoverMemberFunc
}
type failureOne failureByFunc
type failureAll failureByFunc
type failureMajority failureByFunc
type failureLeader struct {
failureByFunc
idx int
}
type failureDelay struct {
failure
delayDuration time.Duration
}
// failureUntilSnapshot injects a failure and waits for a snapshot event
type failureUntilSnapshot struct{ failure }
func (f *failureOne) Inject(c *cluster, round int) error {
return f.injectMember(c.Members[round%c.Size])
}
func (f *failureOne) Recover(c *cluster, round int) error {
if err := f.recoverMember(c.Members[round%c.Size]); err != nil {
return err
}
return c.WaitHealth()
}
func (f *failureAll) Inject(c *cluster, round int) error {
for _, m := range c.Members {
if err := f.injectMember(m); err != nil {
return err
}
}
return nil
}
func (f *failureAll) Recover(c *cluster, round int) error {
for _, m := range c.Members {
if err := f.recoverMember(m); err != nil {
return err
}
}
return c.WaitHealth()
}
func (f *failureMajority) Inject(c *cluster, round int) error {
for i := range killMap(c.Size, round) {
if err := f.injectMember(c.Members[i]); err != nil {
return err
}
}
return nil
}
func (f *failureMajority) Recover(c *cluster, round int) error {
for i := range killMap(c.Size, round) {
if err := f.recoverMember(c.Members[i]); err != nil {
return err
}
}
return nil
}
func (f *failureLeader) Inject(c *cluster, round int) error {
idx, err := c.GetLeader()
if err != nil {
return err
}
f.idx = idx
return f.injectMember(c.Members[idx])
}
func (f *failureLeader) Recover(c *cluster, round int) error {
if err := f.recoverMember(c.Members[f.idx]); err != nil {
return err
}
return c.WaitHealth()
}
func (f *failureDelay) Inject(c *cluster, round int) error {
if err := f.failure.Inject(c, round); err != nil {
return err
}
if f.delayDuration > 0 {
plog.Infof("sleeping delay duration %v for %q", f.delayDuration, f.failure.Desc())
time.Sleep(f.delayDuration)
}
return nil
}
func (f *failureUntilSnapshot) Inject(c *cluster, round int) error {
if err := f.failure.Inject(c, round); err != nil {
return err
}
if c.Size < 3 {
return nil
}
// maxRev may fail since failure just injected, retry if failed.
startRev, err := c.maxRev()
for i := 0; i < 10 && startRev == 0; i++ {
startRev, err = c.maxRev()
}
if startRev == 0 {
return err
}
lastRev := startRev
// Normal healthy cluster could accept 1000req/s at least.
// Give it 3-times time to create a new snapshot.
retry := snapshotCount / 1000 * 3
for j := 0; j < retry; j++ {
lastRev, _ = c.maxRev()
// If the number of proposals committed is bigger than snapshot count,
// a new snapshot should have been created.
if lastRev-startRev > snapshotCount {
return nil
}
time.Sleep(time.Second)
}
return fmt.Errorf("cluster too slow: only commit %d requests in %ds", lastRev-startRev, retry)
}
func (f *failureUntilSnapshot) Desc() string {
return f.failure.Desc() + " for a long time and expect it to recover from an incoming snapshot"
}
func killMap(size int, seed int) map[int]bool {
m := make(map[int]bool)
r := rand.New(rand.NewSource(int64(seed)))
majority := size/2 + 1
for {
m[r.Intn(size)] = true
if len(m) >= majority {
return m
}
}
}
type failureNop failureByFunc
func (f *failureNop) Inject(c *cluster, round int) error { return nil }
func (f *failureNop) Recover(c *cluster, round int) error { return nil }
type failureExternal struct {
failure
description string
scriptPath string
}
func (f *failureExternal) Inject(c *cluster, round int) error {
return exec.Command(f.scriptPath, "enable", fmt.Sprintf("%d", round)).Run()
}
func (f *failureExternal) Recover(c *cluster, round int) error {
return exec.Command(f.scriptPath, "disable", fmt.Sprintf("%d", round)).Run()
}
func (f *failureExternal) Desc() string { return f.description }

View File

@ -1,177 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"time"
)
const (
snapshotCount = 10000
slowNetworkLatency = 500 // 500 millisecond
randomVariation = 50
// delay duration to trigger leader election (default election timeout 1s)
triggerElectionDur = 5 * time.Second
// Wait more when it recovers from slow network, because network layer
// needs extra time to propagate traffic control (tc command) change.
// Otherwise, we get different hash values from the previous revision.
// For more detail, please see https://github.com/coreos/etcd/issues/5121.
waitRecover = 5 * time.Second
)
func injectStop(m *member) error { return m.Agent.Stop() }
func recoverStop(m *member) error {
_, err := m.Agent.Restart()
return err
}
func newFailureKillAll() failure {
return &failureAll{
description: "kill all members",
injectMember: injectStop,
recoverMember: recoverStop,
}
}
func newFailureKillMajority() failure {
return &failureMajority{
description: "kill majority of the cluster",
injectMember: injectStop,
recoverMember: recoverStop,
}
}
func newFailureKillOne() failure {
return &failureOne{
description: "kill one random member",
injectMember: injectStop,
recoverMember: recoverStop,
}
}
func newFailureKillLeader() failure {
ff := failureByFunc{
description: "kill leader member",
injectMember: injectStop,
recoverMember: recoverStop,
}
return &failureLeader{ff, 0}
}
func newFailureKillOneForLongTime() failure {
return &failureUntilSnapshot{newFailureKillOne()}
}
func newFailureKillLeaderForLongTime() failure {
return &failureUntilSnapshot{newFailureKillLeader()}
}
func injectDropPort(m *member) error { return m.Agent.DropPort(m.peerPort()) }
func recoverDropPort(m *member) error { return m.Agent.RecoverPort(m.peerPort()) }
func newFailureIsolate() failure {
f := &failureOne{
description: "isolate one member",
injectMember: injectDropPort,
recoverMember: recoverDropPort,
}
return &failureDelay{
failure: f,
delayDuration: triggerElectionDur,
}
}
func newFailureIsolateAll() failure {
f := &failureAll{
description: "isolate all members",
injectMember: injectDropPort,
recoverMember: recoverDropPort,
}
return &failureDelay{
failure: f,
delayDuration: triggerElectionDur,
}
}
func injectLatency(m *member) error {
if err := m.Agent.SetLatency(slowNetworkLatency, randomVariation); err != nil {
m.Agent.RemoveLatency()
return err
}
return nil
}
func recoverLatency(m *member) error {
if err := m.Agent.RemoveLatency(); err != nil {
return err
}
time.Sleep(waitRecover)
return nil
}
func newFailureSlowNetworkOneMember() failure {
desc := fmt.Sprintf("slow down one member's network by adding %d ms latency", slowNetworkLatency)
f := &failureOne{
description: description(desc),
injectMember: injectLatency,
recoverMember: recoverLatency,
}
return &failureDelay{
failure: f,
delayDuration: triggerElectionDur,
}
}
func newFailureSlowNetworkLeader() failure {
desc := fmt.Sprintf("slow down leader's network by adding %d ms latency", slowNetworkLatency)
ff := failureByFunc{
description: description(desc),
injectMember: injectLatency,
recoverMember: recoverLatency,
}
f := &failureLeader{ff, 0}
return &failureDelay{
failure: f,
delayDuration: triggerElectionDur,
}
}
func newFailureSlowNetworkAll() failure {
f := &failureAll{
description: "slow down all members' network",
injectMember: injectLatency,
recoverMember: recoverLatency,
}
return &failureDelay{
failure: f,
delayDuration: triggerElectionDur,
}
}
func newFailureNop() failure {
return &failureNop{
description: "no failure",
}
}
func newFailureExternal(scriptPath string) failure {
return &failureExternal{
description: fmt.Sprintf("external fault injector (script: %s)", scriptPath),
scriptPath: scriptPath,
}
}

View File

@ -1,44 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"encoding/json"
"net/http"
)
type statusHandler struct {
status *Status
}
func (sh statusHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
en := json.NewEncoder(w)
sh.status.mu.Lock()
defer sh.status.mu.Unlock()
if err := en.Encode(Status{
Since: sh.status.Since,
Failures: sh.status.Failures,
RoundLimit: sh.status.RoundLimit,
Cluster: sh.status.cluster.Status(),
cluster: sh.status.cluster,
Round: sh.status.Round,
Case: sh.status.Case,
}); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
}

View File

@ -1,331 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"context"
"fmt"
"math/rand"
"sync"
"sync/atomic"
"time"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"golang.org/x/time/rate"
"google.golang.org/grpc"
"google.golang.org/grpc/transport"
)
type keyStresser struct {
Endpoint string
keyLargeSize int
keySize int
keySuffixRange int
keyTxnSuffixRange int
keyTxnOps int
N int
rateLimiter *rate.Limiter
wg sync.WaitGroup
cancel func()
conn *grpc.ClientConn
// atomicModifiedKeys records the number of keys created and deleted by the stresser.
atomicModifiedKeys int64
stressTable *stressTable
}
func (s *keyStresser) Stress() error {
// TODO: add backoff option
conn, err := grpc.Dial(s.Endpoint, grpc.WithInsecure())
if err != nil {
return fmt.Errorf("%v (%s)", err, s.Endpoint)
}
ctx, cancel := context.WithCancel(context.Background())
s.wg.Add(s.N)
s.conn = conn
s.cancel = cancel
kvc := pb.NewKVClient(conn)
var stressEntries = []stressEntry{
{weight: 0.7, f: newStressPut(kvc, s.keySuffixRange, s.keySize)},
{
weight: 0.7 * float32(s.keySize) / float32(s.keyLargeSize),
f: newStressPut(kvc, s.keySuffixRange, s.keyLargeSize),
},
{weight: 0.07, f: newStressRange(kvc, s.keySuffixRange)},
{weight: 0.07, f: newStressRangeInterval(kvc, s.keySuffixRange)},
{weight: 0.07, f: newStressDelete(kvc, s.keySuffixRange)},
{weight: 0.07, f: newStressDeleteInterval(kvc, s.keySuffixRange)},
}
if s.keyTxnSuffixRange > 0 {
// adjust to make up ±70% of workloads with writes
stressEntries[0].weight = 0.35
stressEntries = append(stressEntries, stressEntry{
weight: 0.35,
f: newStressTxn(kvc, s.keyTxnSuffixRange, s.keyTxnOps),
})
}
s.stressTable = createStressTable(stressEntries)
for i := 0; i < s.N; i++ {
go s.run(ctx)
}
plog.Infof("keyStresser %q is started", s.Endpoint)
return nil
}
func (s *keyStresser) run(ctx context.Context) {
defer s.wg.Done()
for {
if err := s.rateLimiter.Wait(ctx); err == context.Canceled {
return
}
// TODO: 10-second is enough timeout to cover leader failure
// and immediate leader election. Find out what other cases this
// could be timed out.
sctx, scancel := context.WithTimeout(ctx, 10*time.Second)
err, modifiedKeys := s.stressTable.choose()(sctx)
scancel()
if err == nil {
atomic.AddInt64(&s.atomicModifiedKeys, modifiedKeys)
continue
}
switch rpctypes.ErrorDesc(err) {
case context.DeadlineExceeded.Error():
// This retries when request is triggered at the same time as
// leader failure. When we terminate the leader, the request to
// that leader cannot be processed, and times out. Also requests
// to followers cannot be forwarded to the old leader, so timing out
// as well. We want to keep stressing until the cluster elects a
// new leader and start processing requests again.
case etcdserver.ErrTimeoutDueToLeaderFail.Error(), etcdserver.ErrTimeout.Error():
// This retries when request is triggered at the same time as
// leader failure and follower nodes receive time out errors
// from losing their leader. Followers should retry to connect
// to the new leader.
case etcdserver.ErrStopped.Error():
// one of the etcd nodes stopped from failure injection
case transport.ErrConnClosing.Desc:
// server closed the transport (failure injected node)
case rpctypes.ErrNotCapable.Error():
// capability check has not been done (in the beginning)
case rpctypes.ErrTooManyRequests.Error():
// hitting the recovering member.
case context.Canceled.Error():
// from stresser.Cancel method:
return
case grpc.ErrClientConnClosing.Error():
// from stresser.Cancel method:
return
default:
plog.Errorf("keyStresser %v exited with error (%v)", s.Endpoint, err)
return
}
}
}
func (s *keyStresser) Pause() {
s.Close()
}
func (s *keyStresser) Close() {
s.cancel()
s.conn.Close()
s.wg.Wait()
plog.Infof("keyStresser %q is closed", s.Endpoint)
}
func (s *keyStresser) ModifiedKeys() int64 {
return atomic.LoadInt64(&s.atomicModifiedKeys)
}
func (s *keyStresser) Checker() Checker { return nil }
type stressFunc func(ctx context.Context) (err error, modifiedKeys int64)
type stressEntry struct {
weight float32
f stressFunc
}
type stressTable struct {
entries []stressEntry
sumWeights float32
}
func createStressTable(entries []stressEntry) *stressTable {
st := stressTable{entries: entries}
for _, entry := range st.entries {
st.sumWeights += entry.weight
}
return &st
}
func (st *stressTable) choose() stressFunc {
v := rand.Float32() * st.sumWeights
var sum float32
var idx int
for i := range st.entries {
sum += st.entries[i].weight
if sum >= v {
idx = i
break
}
}
return st.entries[idx].f
}
func newStressPut(kvc pb.KVClient, keySuffixRange, keySize int) stressFunc {
return func(ctx context.Context) (error, int64) {
_, err := kvc.Put(ctx, &pb.PutRequest{
Key: []byte(fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange))),
Value: randBytes(keySize),
}, grpc.FailFast(false))
return err, 1
}
}
func newStressTxn(kvc pb.KVClient, keyTxnSuffixRange, txnOps int) stressFunc {
keys := make([]string, keyTxnSuffixRange)
for i := range keys {
keys[i] = fmt.Sprintf("/k%03d", i)
}
return writeTxn(kvc, keys, txnOps)
}
func writeTxn(kvc pb.KVClient, keys []string, txnOps int) stressFunc {
return func(ctx context.Context) (error, int64) {
ks := make(map[string]struct{}, txnOps)
for len(ks) != txnOps {
ks[keys[rand.Intn(len(keys))]] = struct{}{}
}
selected := make([]string, 0, txnOps)
for k := range ks {
selected = append(selected, k)
}
com, delOp, putOp := getTxnReqs(selected[0], "bar00")
txnReq := &pb.TxnRequest{
Compare: []*pb.Compare{com},
Success: []*pb.RequestOp{delOp},
Failure: []*pb.RequestOp{putOp},
}
// add nested txns if any
for i := 1; i < txnOps; i++ {
k, v := selected[i], fmt.Sprintf("bar%02d", i)
com, delOp, putOp = getTxnReqs(k, v)
nested := &pb.RequestOp{
Request: &pb.RequestOp_RequestTxn{
RequestTxn: &pb.TxnRequest{
Compare: []*pb.Compare{com},
Success: []*pb.RequestOp{delOp},
Failure: []*pb.RequestOp{putOp},
},
},
}
txnReq.Success = append(txnReq.Success, nested)
txnReq.Failure = append(txnReq.Failure, nested)
}
_, err := kvc.Txn(ctx, txnReq, grpc.FailFast(false))
return err, int64(txnOps)
}
}
func getTxnReqs(key, val string) (com *pb.Compare, delOp *pb.RequestOp, putOp *pb.RequestOp) {
// if key exists (version > 0)
com = &pb.Compare{
Key: []byte(key),
Target: pb.Compare_VERSION,
Result: pb.Compare_GREATER,
TargetUnion: &pb.Compare_Version{Version: 0},
}
delOp = &pb.RequestOp{
Request: &pb.RequestOp_RequestDeleteRange{
RequestDeleteRange: &pb.DeleteRangeRequest{
Key: []byte(key),
},
},
}
putOp = &pb.RequestOp{
Request: &pb.RequestOp_RequestPut{
RequestPut: &pb.PutRequest{
Key: []byte(key),
Value: []byte(val),
},
},
}
return com, delOp, putOp
}
func newStressRange(kvc pb.KVClient, keySuffixRange int) stressFunc {
return func(ctx context.Context) (error, int64) {
_, err := kvc.Range(ctx, &pb.RangeRequest{
Key: []byte(fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange))),
}, grpc.FailFast(false))
return err, 0
}
}
func newStressRangeInterval(kvc pb.KVClient, keySuffixRange int) stressFunc {
return func(ctx context.Context) (error, int64) {
start := rand.Intn(keySuffixRange)
end := start + 500
_, err := kvc.Range(ctx, &pb.RangeRequest{
Key: []byte(fmt.Sprintf("foo%016x", start)),
RangeEnd: []byte(fmt.Sprintf("foo%016x", end)),
}, grpc.FailFast(false))
return err, 0
}
}
func newStressDelete(kvc pb.KVClient, keySuffixRange int) stressFunc {
return func(ctx context.Context) (error, int64) {
_, err := kvc.DeleteRange(ctx, &pb.DeleteRangeRequest{
Key: []byte(fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange))),
}, grpc.FailFast(false))
return err, 1
}
}
func newStressDeleteInterval(kvc pb.KVClient, keySuffixRange int) stressFunc {
return func(ctx context.Context) (error, int64) {
start := rand.Intn(keySuffixRange)
end := start + 500
resp, err := kvc.DeleteRange(ctx, &pb.DeleteRangeRequest{
Key: []byte(fmt.Sprintf("foo%016x", start)),
RangeEnd: []byte(fmt.Sprintf("foo%016x", end)),
}, grpc.FailFast(false))
if err == nil {
return nil, resp.Deleted
}
return err, 0
}
}

View File

@ -1,382 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"context"
"fmt"
"math/rand"
"sync"
"sync/atomic"
"time"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"golang.org/x/time/rate"
"google.golang.org/grpc"
)
const (
// time to live for lease
TTL = 120
TTLShort = 2
)
type leaseStresser struct {
endpoint string
cancel func()
conn *grpc.ClientConn
kvc pb.KVClient
lc pb.LeaseClient
ctx context.Context
rateLimiter *rate.Limiter
// atomicModifiedKey records the number of keys created and deleted during a test case
atomicModifiedKey int64
numLeases int
keysPerLease int
aliveLeases *atomicLeases
revokedLeases *atomicLeases
shortLivedLeases *atomicLeases
runWg sync.WaitGroup
aliveWg sync.WaitGroup
}
type atomicLeases struct {
// rwLock is used to protect read/write access of leases map
// which are accessed and modified by different go routines.
rwLock sync.RWMutex
leases map[int64]time.Time
}
func (al *atomicLeases) add(leaseID int64, t time.Time) {
al.rwLock.Lock()
al.leases[leaseID] = t
al.rwLock.Unlock()
}
func (al *atomicLeases) update(leaseID int64, t time.Time) {
al.rwLock.Lock()
_, ok := al.leases[leaseID]
if ok {
al.leases[leaseID] = t
}
al.rwLock.Unlock()
}
func (al *atomicLeases) read(leaseID int64) (rv time.Time, ok bool) {
al.rwLock.RLock()
rv, ok = al.leases[leaseID]
al.rwLock.RUnlock()
return rv, ok
}
func (al *atomicLeases) remove(leaseID int64) {
al.rwLock.Lock()
delete(al.leases, leaseID)
al.rwLock.Unlock()
}
func (al *atomicLeases) getLeasesMap() map[int64]time.Time {
leasesCopy := make(map[int64]time.Time)
al.rwLock.RLock()
for k, v := range al.leases {
leasesCopy[k] = v
}
al.rwLock.RUnlock()
return leasesCopy
}
func (ls *leaseStresser) setupOnce() error {
if ls.aliveLeases != nil {
return nil
}
if ls.numLeases == 0 {
panic("expect numLeases to be set")
}
if ls.keysPerLease == 0 {
panic("expect keysPerLease to be set")
}
ls.aliveLeases = &atomicLeases{leases: make(map[int64]time.Time)}
return nil
}
func (ls *leaseStresser) Stress() error {
plog.Infof("lease Stresser %v starting ...", ls.endpoint)
if err := ls.setupOnce(); err != nil {
return err
}
conn, err := grpc.Dial(ls.endpoint, grpc.WithInsecure(), grpc.WithBackoffMaxDelay(1*time.Second))
if err != nil {
return fmt.Errorf("%v (%s)", err, ls.endpoint)
}
ls.conn = conn
ls.kvc = pb.NewKVClient(conn)
ls.lc = pb.NewLeaseClient(conn)
ls.revokedLeases = &atomicLeases{leases: make(map[int64]time.Time)}
ls.shortLivedLeases = &atomicLeases{leases: make(map[int64]time.Time)}
ctx, cancel := context.WithCancel(context.Background())
ls.cancel = cancel
ls.ctx = ctx
ls.runWg.Add(1)
go ls.run()
return nil
}
func (ls *leaseStresser) run() {
defer ls.runWg.Done()
ls.restartKeepAlives()
for {
// the number of keys created and deleted is roughly 2x the number of created keys for an iteration.
// the rateLimiter therefore consumes 2x ls.numLeases*ls.keysPerLease tokens where each token represents a create/delete operation for key.
err := ls.rateLimiter.WaitN(ls.ctx, 2*ls.numLeases*ls.keysPerLease)
if err == context.Canceled {
return
}
plog.Debugf("creating lease on %v", ls.endpoint)
ls.createLeases()
plog.Debugf("done creating lease on %v", ls.endpoint)
plog.Debugf("dropping lease on %v", ls.endpoint)
ls.randomlyDropLeases()
plog.Debugf("done dropping lease on %v", ls.endpoint)
}
}
func (ls *leaseStresser) restartKeepAlives() {
for leaseID := range ls.aliveLeases.getLeasesMap() {
ls.aliveWg.Add(1)
go func(id int64) {
ls.keepLeaseAlive(id)
}(leaseID)
}
}
func (ls *leaseStresser) createLeases() {
ls.createAliveLeases()
ls.createShortLivedLeases()
}
func (ls *leaseStresser) createAliveLeases() {
neededLeases := ls.numLeases - len(ls.aliveLeases.getLeasesMap())
var wg sync.WaitGroup
for i := 0; i < neededLeases; i++ {
wg.Add(1)
go func() {
defer wg.Done()
leaseID, err := ls.createLeaseWithKeys(TTL)
if err != nil {
plog.Debugf("lease creation error: (%v)", err)
return
}
ls.aliveLeases.add(leaseID, time.Now())
// keep track of all the keep lease alive go routines
ls.aliveWg.Add(1)
go ls.keepLeaseAlive(leaseID)
}()
}
wg.Wait()
}
func (ls *leaseStresser) createShortLivedLeases() {
// one round of createLeases() might not create all the short lived leases we want due to falures.
// thus, we want to create remaining short lived leases in the future round.
neededLeases := ls.numLeases - len(ls.shortLivedLeases.getLeasesMap())
var wg sync.WaitGroup
for i := 0; i < neededLeases; i++ {
wg.Add(1)
go func() {
defer wg.Done()
leaseID, err := ls.createLeaseWithKeys(TTLShort)
if err != nil {
return
}
ls.shortLivedLeases.add(leaseID, time.Now())
}()
}
wg.Wait()
}
func (ls *leaseStresser) createLeaseWithKeys(ttl int64) (int64, error) {
leaseID, err := ls.createLease(ttl)
if err != nil {
plog.Debugf("lease creation error: (%v)", err)
return -1, err
}
plog.Debugf("lease %v created ", leaseID)
if err := ls.attachKeysWithLease(leaseID); err != nil {
return -1, err
}
return leaseID, nil
}
func (ls *leaseStresser) randomlyDropLeases() {
var wg sync.WaitGroup
for l := range ls.aliveLeases.getLeasesMap() {
wg.Add(1)
go func(leaseID int64) {
defer wg.Done()
dropped, err := ls.randomlyDropLease(leaseID)
// if randomlyDropLease encountered an error such as context is cancelled, remove the lease from aliveLeases
// because we can't tell whether the lease is dropped or not.
if err != nil {
plog.Debugf("drop lease %v has failed error (%v)", leaseID, err)
ls.aliveLeases.remove(leaseID)
return
}
if !dropped {
return
}
plog.Debugf("lease %v dropped", leaseID)
ls.revokedLeases.add(leaseID, time.Now())
ls.aliveLeases.remove(leaseID)
}(l)
}
wg.Wait()
}
func (ls *leaseStresser) createLease(ttl int64) (int64, error) {
resp, err := ls.lc.LeaseGrant(ls.ctx, &pb.LeaseGrantRequest{TTL: ttl})
if err != nil {
return -1, err
}
return resp.ID, nil
}
func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
defer ls.aliveWg.Done()
ctx, cancel := context.WithCancel(ls.ctx)
stream, err := ls.lc.LeaseKeepAlive(ctx)
defer func() { cancel() }()
for {
select {
case <-time.After(500 * time.Millisecond):
case <-ls.ctx.Done():
plog.Debugf("keepLeaseAlive lease %v context canceled ", leaseID)
// it is possible that lease expires at invariant checking phase but not at keepLeaseAlive() phase.
// this scenerio is possible when alive lease is just about to expire when keepLeaseAlive() exists and expires at invariant checking phase.
// to circumvent that scenerio, we check each lease before keepalive loop exist to see if it has been renewed in last TTL/2 duration.
// if it is renewed, this means that invariant checking have at least ttl/2 time before lease exipres which is long enough for the checking to finish.
// if it is not renewed, we remove the lease from the alive map so that the lease doesn't exipre during invariant checking
renewTime, ok := ls.aliveLeases.read(leaseID)
if ok && renewTime.Add(TTL/2*time.Second).Before(time.Now()) {
ls.aliveLeases.remove(leaseID)
plog.Debugf("keepLeaseAlive lease %v has not been renewed. drop it.", leaseID)
}
return
}
if err != nil {
plog.Debugf("keepLeaseAlive lease %v creates stream error: (%v)", leaseID, err)
cancel()
ctx, cancel = context.WithCancel(ls.ctx)
stream, err = ls.lc.LeaseKeepAlive(ctx)
cancel()
continue
}
err = stream.Send(&pb.LeaseKeepAliveRequest{ID: leaseID})
plog.Debugf("keepLeaseAlive stream sends lease %v keepalive request", leaseID)
if err != nil {
plog.Debugf("keepLeaseAlive stream sends lease %v error (%v)", leaseID, err)
continue
}
leaseRenewTime := time.Now()
plog.Debugf("keepLeaseAlive stream sends lease %v keepalive request succeed", leaseID)
respRC, err := stream.Recv()
if err != nil {
plog.Debugf("keepLeaseAlive stream receives lease %v stream error (%v)", leaseID, err)
continue
}
// lease expires after TTL become 0
// don't send keepalive if the lease has expired
if respRC.TTL <= 0 {
plog.Debugf("keepLeaseAlive stream receives lease %v has TTL <= 0", leaseID)
ls.aliveLeases.remove(leaseID)
return
}
// renew lease timestamp only if lease is present
plog.Debugf("keepLeaseAlive renew lease %v", leaseID)
ls.aliveLeases.update(leaseID, leaseRenewTime)
}
}
// attachKeysWithLease function attaches keys to the lease.
// the format of key is the concat of leaseID + '_' + '<order of key creation>'
// e.g 5186835655248304152_0 for first created key and 5186835655248304152_1 for second created key
func (ls *leaseStresser) attachKeysWithLease(leaseID int64) error {
var txnPuts []*pb.RequestOp
for j := 0; j < ls.keysPerLease; j++ {
txnput := &pb.RequestOp{Request: &pb.RequestOp_RequestPut{RequestPut: &pb.PutRequest{Key: []byte(fmt.Sprintf("%d%s%d", leaseID, "_", j)),
Value: []byte(fmt.Sprintf("bar")), Lease: leaseID}}}
txnPuts = append(txnPuts, txnput)
}
// keep retrying until lease is not found or ctx is being canceled
for ls.ctx.Err() == nil {
txn := &pb.TxnRequest{Success: txnPuts}
_, err := ls.kvc.Txn(ls.ctx, txn)
if err == nil {
// since all created keys will be deleted too, the number of operations on keys will be roughly 2x the number of created keys
atomic.AddInt64(&ls.atomicModifiedKey, 2*int64(ls.keysPerLease))
return nil
}
if rpctypes.Error(err) == rpctypes.ErrLeaseNotFound {
return err
}
}
return ls.ctx.Err()
}
// randomlyDropLease drops the lease only when the rand.Int(2) returns 1.
// This creates a 50/50 percents chance of dropping a lease
func (ls *leaseStresser) randomlyDropLease(leaseID int64) (bool, error) {
if rand.Intn(2) != 0 {
return false, nil
}
// keep retrying until a lease is dropped or ctx is being canceled
for ls.ctx.Err() == nil {
_, err := ls.lc.LeaseRevoke(ls.ctx, &pb.LeaseRevokeRequest{ID: leaseID})
if err == nil || rpctypes.Error(err) == rpctypes.ErrLeaseNotFound {
return true, nil
}
}
plog.Debugf("randomlyDropLease error: (%v)", ls.ctx.Err())
return false, ls.ctx.Err()
}
func (ls *leaseStresser) Pause() {
ls.Close()
}
func (ls *leaseStresser) Close() {
plog.Debugf("lease stresser %q is closing...", ls.endpoint)
ls.cancel()
ls.runWg.Wait()
ls.aliveWg.Wait()
ls.conn.Close()
plog.Infof("lease stresser %q is closed", ls.endpoint)
}
func (ls *leaseStresser) ModifiedKeys() int64 {
return atomic.LoadInt64(&ls.atomicModifiedKey)
}
func (ls *leaseStresser) Checker() Checker { return &leaseChecker{endpoint: ls.endpoint, ls: ls} }

View File

@ -1,232 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"flag"
"fmt"
"io/ioutil"
"net/http"
"os"
"strings"
"github.com/coreos/etcd/pkg/debugutil"
"github.com/coreos/pkg/capnslog"
"github.com/prometheus/client_golang/prometheus/promhttp"
"golang.org/x/time/rate"
"google.golang.org/grpc/grpclog"
)
var plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "etcd-tester")
const (
defaultClientPort = 2379
defaultPeerPort = 2380
defaultFailpointPort = 2381
)
func main() {
endpointStr := flag.String("agent-endpoints", "localhost:9027", "HTTP RPC endpoints of agents. Do not specify the schema.")
clientPorts := flag.String("client-ports", "", "etcd client port for each agent endpoint")
advertiseClientPorts := flag.String("advertise-client-ports", "", "etcd advertise client port for each agent endpoint")
peerPorts := flag.String("peer-ports", "", "etcd peer port for each agent endpoint")
advertisePeerPorts := flag.String("advertise-peer-ports", "", "etcd advertise peer port for each agent endpoint")
failpointPorts := flag.String("failpoint-ports", "", "etcd failpoint port for each agent endpoint")
stressKeyLargeSize := flag.Uint("stress-key-large-size", 32*1024+1, "the size of each large key written into etcd.")
stressKeySize := flag.Uint("stress-key-size", 100, "the size of each small key written into etcd.")
stressKeySuffixRange := flag.Uint("stress-key-count", 250000, "the count of key range written into etcd.")
stressKeyTxnSuffixRange := flag.Uint("stress-key-txn-count", 100, "the count of key range written into etcd txn (max 100).")
stressKeyTxnOps := flag.Uint("stress-key-txn-ops", 1, "number of operations per a transaction (max 64).")
limit := flag.Int("limit", -1, "the limit of rounds to run failure set (-1 to run without limits).")
exitOnFailure := flag.Bool("exit-on-failure", false, "exit tester on first failure")
stressQPS := flag.Int("stress-qps", 10000, "maximum number of stresser requests per second.")
schedCases := flag.String("schedule-cases", "", "test case schedule")
consistencyCheck := flag.Bool("consistency-check", true, "true to check consistency (revision, hash)")
stresserType := flag.String("stresser", "keys,lease", "comma separated list of stressers (keys, lease, v2keys, nop, election-runner, watch-runner, lock-racer-runner, lease-runner).")
etcdRunnerPath := flag.String("etcd-runner", "", "specify a path of etcd runner binary")
failureTypes := flag.String("failures", "default,failpoints", "specify failures (concat of \"default\" and \"failpoints\").")
failpoints := flag.String("failpoints", `panic("etcd-tester")`, `comma separated list of failpoint terms to inject (e.g. 'panic("etcd-tester"),1*sleep(1000)')`)
externalFailures := flag.String("external-failures", "", "specify a path of script for enabling/disabling an external fault injector")
enablePprof := flag.Bool("enable-pprof", false, "true to enable pprof")
flag.Parse()
// to discard gRPC-side balancer logs
grpclog.SetLoggerV2(grpclog.NewLoggerV2(ioutil.Discard, ioutil.Discard, ioutil.Discard))
eps := strings.Split(*endpointStr, ",")
cports := portsFromArg(*clientPorts, len(eps), defaultClientPort)
acports := portsFromArg(*advertiseClientPorts, len(eps), defaultClientPort)
pports := portsFromArg(*peerPorts, len(eps), defaultPeerPort)
apports := portsFromArg(*advertisePeerPorts, len(eps), defaultPeerPort)
fports := portsFromArg(*failpointPorts, len(eps), defaultFailpointPort)
agents := make([]agentConfig, len(eps))
for i := range eps {
agents[i].endpoint = eps[i]
agents[i].clientPort = cports[i]
agents[i].advertiseClientPort = acports[i]
agents[i].peerPort = pports[i]
agents[i].advertisePeerPort = apports[i]
agents[i].failpointPort = fports[i]
}
c := &cluster{agents: agents}
if err := c.bootstrap(); err != nil {
plog.Fatal(err)
}
defer c.Terminate()
// ensure cluster is fully booted to know failpoints are available
c.WaitHealth()
var failures []failure
if failureTypes != nil && *failureTypes != "" {
types, failpoints := strings.Split(*failureTypes, ","), strings.Split(*failpoints, ",")
failures = makeFailures(types, failpoints, c)
}
if externalFailures != nil && *externalFailures != "" {
if len(failures) != 0 {
plog.Errorf("specify only one of -failures or -external-failures")
os.Exit(1)
}
failures = append(failures, newFailureExternal(*externalFailures))
}
if len(failures) == 0 {
plog.Infof("no failures\n")
failures = append(failures, newFailureNop())
}
schedule := failures
if schedCases != nil && *schedCases != "" {
cases := strings.Split(*schedCases, " ")
schedule = make([]failure, len(cases))
for i := range cases {
caseNum := 0
n, err := fmt.Sscanf(cases[i], "%d", &caseNum)
if n == 0 || err != nil {
plog.Fatalf(`couldn't parse case "%s" (%v)`, cases[i], err)
}
schedule[i] = failures[caseNum]
}
}
scfg := stressConfig{
rateLimiter: rate.NewLimiter(rate.Limit(*stressQPS), *stressQPS),
keyLargeSize: int(*stressKeyLargeSize),
keySize: int(*stressKeySize),
keySuffixRange: int(*stressKeySuffixRange),
keyTxnSuffixRange: int(*stressKeyTxnSuffixRange),
keyTxnOps: int(*stressKeyTxnOps),
numLeases: 10,
keysPerLease: 10,
etcdRunnerPath: *etcdRunnerPath,
}
if scfg.keyTxnSuffixRange > 100 {
plog.Fatalf("stress-key-txn-count is maximum 100, got %d", scfg.keyTxnSuffixRange)
}
if scfg.keyTxnOps > 64 {
plog.Fatalf("stress-key-txn-ops is maximum 64, got %d", scfg.keyTxnOps)
}
t := &tester{
failures: schedule,
cluster: c,
limit: *limit,
exitOnFailure: *exitOnFailure,
scfg: scfg,
stresserType: *stresserType,
doChecks: *consistencyCheck,
}
sh := statusHandler{status: &t.status}
http.Handle("/status", sh)
http.Handle("/metrics", promhttp.Handler())
if *enablePprof {
for p, h := range debugutil.PProfHandlers() {
http.Handle(p, h)
}
}
go func() { plog.Fatal(http.ListenAndServe(":9028", nil)) }()
t.runLoop()
}
// portsFromArg converts a comma separated list into a slice of ints
func portsFromArg(arg string, n, defaultPort int) []int {
ret := make([]int, n)
if len(arg) == 0 {
for i := range ret {
ret[i] = defaultPort
}
return ret
}
s := strings.Split(arg, ",")
if len(s) != n {
fmt.Printf("expected %d ports, got %d (%s)\n", n, len(s), arg)
os.Exit(1)
}
for i := range s {
if _, err := fmt.Sscanf(s[i], "%d", &ret[i]); err != nil {
fmt.Println(err)
os.Exit(1)
}
}
return ret
}
func makeFailures(types, failpoints []string, c *cluster) []failure {
var failures []failure
for i := range types {
switch types[i] {
case "default":
defaultFailures := []failure{
newFailureKillAll(),
newFailureKillMajority(),
newFailureKillOne(),
newFailureKillLeader(),
newFailureKillOneForLongTime(),
newFailureKillLeaderForLongTime(),
newFailureIsolate(),
newFailureIsolateAll(),
newFailureSlowNetworkOneMember(),
newFailureSlowNetworkLeader(),
newFailureSlowNetworkAll(),
}
failures = append(failures, defaultFailures...)
case "failpoints":
fpFailures, fperr := failpointFailures(c, failpoints)
if len(fpFailures) == 0 {
plog.Infof("no failpoints found (%v)", fperr)
}
failures = append(failures, fpFailures...)
default:
plog.Errorf("unknown failure: %s\n", types[i])
os.Exit(1)
}
}
return failures
}

View File

@ -1,188 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"context"
"fmt"
"net"
"net/url"
"time"
"github.com/coreos/etcd/clientv3"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
"google.golang.org/grpc"
)
type member struct {
Agent client.Agent
Endpoint string
Name string
ClientURL string
AdvertiseClientURL string
PeerURL string
AdvertisePeerURL string
FailpointURL string
}
func (m *member) ClusterEntry() string { return m.Name + "=" + m.AdvertisePeerURL }
func (m *member) Flags() []string {
return []string{
"--name", m.Name,
"--listen-client-urls", m.ClientURL,
"--advertise-client-urls", m.AdvertiseClientURL,
"--listen-peer-urls", m.PeerURL,
"--initial-advertise-peer-urls", m.AdvertisePeerURL,
"--initial-cluster-state", "new",
"--experimental-initial-corrupt-check",
}
}
func (m *member) CheckCompact(rev int64) error {
cli, err := m.newClientV3()
if err != nil {
return fmt.Errorf("%v (endpoint %s)", err, m.AdvertiseClientURL)
}
defer cli.Close()
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
wch := cli.Watch(ctx, "\x00", clientv3.WithFromKey(), clientv3.WithRev(rev-1))
wr, ok := <-wch
cancel()
if !ok {
return fmt.Errorf("watch channel terminated (endpoint %s)", m.AdvertiseClientURL)
}
if wr.CompactRevision != rev {
return fmt.Errorf("got compact revision %v, wanted %v (endpoint %s)", wr.CompactRevision, rev, m.AdvertiseClientURL)
}
return nil
}
func (m *member) Defrag() error {
plog.Printf("defragmenting %s\n", m.AdvertiseClientURL)
cli, err := m.newClientV3()
if err != nil {
return err
}
defer cli.Close()
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
_, err = cli.Defragment(ctx, m.AdvertiseClientURL)
cancel()
if err != nil {
return err
}
plog.Printf("defragmented %s\n", m.AdvertiseClientURL)
return nil
}
func (m *member) RevHash() (int64, int64, error) {
conn, err := m.dialGRPC()
if err != nil {
return 0, 0, err
}
mt := pb.NewMaintenanceClient(conn)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
resp, err := mt.Hash(ctx, &pb.HashRequest{}, grpc.FailFast(false))
cancel()
conn.Close()
if err != nil {
return 0, 0, err
}
return resp.Header.Revision, int64(resp.Hash), nil
}
func (m *member) Rev(ctx context.Context) (int64, error) {
cli, err := m.newClientV3()
if err != nil {
return 0, err
}
defer cli.Close()
resp, err := cli.Status(ctx, m.AdvertiseClientURL)
if err != nil {
return 0, err
}
return resp.Header.Revision, nil
}
func (m *member) IsLeader() (bool, error) {
cli, err := m.newClientV3()
if err != nil {
return false, err
}
defer cli.Close()
resp, err := cli.Status(context.Background(), m.AdvertiseClientURL)
if err != nil {
return false, err
}
return resp.Header.MemberId == resp.Leader, nil
}
func (m *member) SetHealthKeyV3() error {
cli, err := m.newClientV3()
if err != nil {
return fmt.Errorf("%v (%s)", err, m.AdvertiseClientURL)
}
defer cli.Close()
// give enough time-out in case expensive requests (range/delete) are pending
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
_, err = cli.Put(ctx, "health", "good")
cancel()
if err != nil {
return fmt.Errorf("%v (%s)", err, m.AdvertiseClientURL)
}
return nil
}
func (m *member) newClientV3() (*clientv3.Client, error) {
return clientv3.New(clientv3.Config{
Endpoints: []string{m.AdvertiseClientURL},
DialTimeout: 5 * time.Second,
})
}
func (m *member) dialGRPC() (*grpc.ClientConn, error) {
return grpc.Dial(m.grpcAddr(), grpc.WithInsecure(), grpc.WithTimeout(5*time.Second), grpc.WithBlock())
}
// grpcAddr gets the host from clientURL so it works with grpc.Dial()
func (m *member) grpcAddr() string {
u, err := url.Parse(m.AdvertiseClientURL)
if err != nil {
panic(err)
}
return u.Host
}
func (m *member) peerPort() (port int) {
u, err := url.Parse(m.AdvertisePeerURL)
if err != nil {
panic(err)
}
_, portStr, err := net.SplitHostPort(u.Host)
if err != nil {
panic(err)
}
if _, err = fmt.Sscanf(portStr, "%d", &port); err != nil {
panic(err)
}
return port
}

View File

@ -1,64 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"github.com/prometheus/client_golang/prometheus"
)
var (
caseTotalCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "funcational_tester",
Name: "case_total",
Help: "Total number of finished test cases",
},
[]string{"desc"},
)
caseFailedTotalCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "funcational_tester",
Name: "case_failed_total",
Help: "Total number of failed test cases",
},
[]string{"desc"},
)
roundTotalCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "funcational_tester",
Name: "round_total",
Help: "Total number of finished test rounds.",
})
roundFailedTotalCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "funcational_tester",
Name: "round_failed_total",
Help: "Total number of failed test rounds.",
})
)
func init() {
prometheus.MustRegister(caseTotalCounter)
prometheus.MustRegister(caseFailedTotalCounter)
prometheus.MustRegister(roundTotalCounter)
prometheus.MustRegister(roundFailedTotalCounter)
}

View File

@ -1,57 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"sync"
"time"
)
type Status struct {
Since time.Time
Failures []string
RoundLimit int
Cluster ClusterStatus
cluster *cluster
mu sync.Mutex // guards Round and Case
Round int
Case int
}
func (s *Status) setRound(r int) {
s.mu.Lock()
defer s.mu.Unlock()
s.Round = r
}
func (s *Status) getRound() int {
s.mu.Lock()
defer s.mu.Unlock()
return s.Round
}
func (s *Status) setCase(c int) {
s.mu.Lock()
defer s.mu.Unlock()
s.Case = c
}
func (s *Status) getCase() int {
s.mu.Lock()
defer s.mu.Unlock()
return s.Case
}

View File

@ -1,218 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"strings"
"sync"
"time"
"golang.org/x/time/rate"
)
type Stresser interface {
// Stress starts to stress the etcd cluster
Stress() error
// Pause stops the stresser from sending requests to etcd. Resume by calling Stress.
Pause()
// Close releases all of the Stresser's resources.
Close()
// ModifiedKeys reports the number of keys created and deleted by stresser
ModifiedKeys() int64
// Checker returns an invariant checker for after the stresser is canceled.
Checker() Checker
}
// nopStresser implements Stresser that does nothing
type nopStresser struct {
start time.Time
qps int
}
func (s *nopStresser) Stress() error { return nil }
func (s *nopStresser) Pause() {}
func (s *nopStresser) Close() {}
func (s *nopStresser) ModifiedKeys() int64 {
return 0
}
func (s *nopStresser) Checker() Checker { return nil }
// compositeStresser implements a Stresser that runs a slice of
// stressers concurrently.
type compositeStresser struct {
stressers []Stresser
}
func (cs *compositeStresser) Stress() error {
for i, s := range cs.stressers {
if err := s.Stress(); err != nil {
for j := 0; j < i; j++ {
cs.stressers[i].Close()
}
return err
}
}
return nil
}
func (cs *compositeStresser) Pause() {
var wg sync.WaitGroup
wg.Add(len(cs.stressers))
for i := range cs.stressers {
go func(s Stresser) {
defer wg.Done()
s.Pause()
}(cs.stressers[i])
}
wg.Wait()
}
func (cs *compositeStresser) Close() {
var wg sync.WaitGroup
wg.Add(len(cs.stressers))
for i := range cs.stressers {
go func(s Stresser) {
defer wg.Done()
s.Close()
}(cs.stressers[i])
}
wg.Wait()
}
func (cs *compositeStresser) ModifiedKeys() (modifiedKey int64) {
for _, stress := range cs.stressers {
modifiedKey += stress.ModifiedKeys()
}
return modifiedKey
}
func (cs *compositeStresser) Checker() Checker {
var chks []Checker
for _, s := range cs.stressers {
if chk := s.Checker(); chk != nil {
chks = append(chks, chk)
}
}
if len(chks) == 0 {
return nil
}
return newCompositeChecker(chks)
}
type stressConfig struct {
keyLargeSize int
keySize int
keySuffixRange int
keyTxnSuffixRange int
keyTxnOps int
numLeases int
keysPerLease int
rateLimiter *rate.Limiter
etcdRunnerPath string
}
// NewStresser creates stresser from a comma separated list of stresser types.
func NewStresser(s string, sc *stressConfig, m *member) Stresser {
types := strings.Split(s, ",")
if len(types) > 1 {
stressers := make([]Stresser, len(types))
for i, stype := range types {
stressers[i] = NewStresser(stype, sc, m)
}
return &compositeStresser{stressers}
}
switch s {
case "nop":
return &nopStresser{start: time.Now(), qps: int(sc.rateLimiter.Limit())}
case "keys":
// TODO: Too intensive stressers can panic etcd member with
// 'out of memory' error. Put rate limits in server side.
return &keyStresser{
Endpoint: m.grpcAddr(),
keyLargeSize: sc.keyLargeSize,
keySize: sc.keySize,
keySuffixRange: sc.keySuffixRange,
keyTxnSuffixRange: sc.keyTxnSuffixRange,
keyTxnOps: sc.keyTxnOps,
N: 100,
rateLimiter: sc.rateLimiter,
}
case "v2keys":
return &v2Stresser{
Endpoint: m.ClientURL,
keySize: sc.keySize,
keySuffixRange: sc.keySuffixRange,
N: 100,
rateLimiter: sc.rateLimiter,
}
case "lease":
return &leaseStresser{
endpoint: m.grpcAddr(),
numLeases: sc.numLeases,
keysPerLease: sc.keysPerLease,
rateLimiter: sc.rateLimiter,
}
case "election-runner":
reqRate := 100
args := []string{
"election",
fmt.Sprintf("%v", time.Now().UnixNano()), // election name as current nano time
"--dial-timeout=10s",
"--endpoints", m.grpcAddr(),
"--total-client-connections=10",
"--rounds=0", // runs forever
"--req-rate", fmt.Sprintf("%v", reqRate),
}
return newRunnerStresser(sc.etcdRunnerPath, args, sc.rateLimiter, reqRate)
case "watch-runner":
reqRate := 100
args := []string{
"watcher",
"--prefix", fmt.Sprintf("%v", time.Now().UnixNano()), // prefix all keys with nano time
"--total-keys=1",
"--total-prefixes=1",
"--watch-per-prefix=1",
"--endpoints", m.grpcAddr(),
"--rounds=0", // runs forever
"--req-rate", fmt.Sprintf("%v", reqRate),
}
return newRunnerStresser(sc.etcdRunnerPath, args, sc.rateLimiter, reqRate)
case "lock-racer-runner":
reqRate := 100
args := []string{
"lock-racer",
fmt.Sprintf("%v", time.Now().UnixNano()), // locker name as current nano time
"--endpoints", m.grpcAddr(),
"--total-client-connections=10",
"--rounds=0", // runs forever
"--req-rate", fmt.Sprintf("%v", reqRate),
}
return newRunnerStresser(sc.etcdRunnerPath, args, sc.rateLimiter, reqRate)
case "lease-runner":
args := []string{
"lease-renewer",
"--ttl=30",
"--endpoints", m.grpcAddr(),
}
return newRunnerStresser(sc.etcdRunnerPath, args, sc.rateLimiter, 0)
default:
plog.Panicf("unknown stresser type: %s\n", s)
}
return nil // never reach here
}

View File

@ -1,286 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"os"
"time"
)
type tester struct {
cluster *cluster
limit int
exitOnFailure bool
failures []failure
status Status
currentRevision int64
stresserType string
scfg stressConfig
doChecks bool
stresser Stresser
checker Checker
}
// compactQPS is rough number of compact requests per second.
// Previous tests showed etcd can compact about 60,000 entries per second.
const compactQPS = 50000
func (tt *tester) runLoop() {
tt.status.Since = time.Now()
tt.status.RoundLimit = tt.limit
tt.status.cluster = tt.cluster
for _, f := range tt.failures {
tt.status.Failures = append(tt.status.Failures, f.Desc())
}
if err := tt.resetStressCheck(); err != nil {
plog.Errorf("%s failed to start stresser (%v)", tt.logPrefix(), err)
tt.failed()
return
}
var preModifiedKey int64
for round := 0; round < tt.limit || tt.limit == -1; round++ {
tt.status.setRound(round)
roundTotalCounter.Inc()
if err := tt.doRound(round); err != nil {
plog.Warningf("%s functional-tester returning with error (%v)", tt.logPrefix(), err)
if tt.cleanup() != nil {
return
}
// reset preModifiedKey after clean up
preModifiedKey = 0
continue
}
// -1 so that logPrefix doesn't print out 'case'
tt.status.setCase(-1)
revToCompact := max(0, tt.currentRevision-10000)
currentModifiedKey := tt.stresser.ModifiedKeys()
modifiedKey := currentModifiedKey - preModifiedKey
preModifiedKey = currentModifiedKey
timeout := 10 * time.Second
timeout += time.Duration(modifiedKey/compactQPS) * time.Second
plog.Infof("%s compacting %d modifications (timeout %v)", tt.logPrefix(), modifiedKey, timeout)
if err := tt.compact(revToCompact, timeout); err != nil {
plog.Warningf("%s functional-tester compact got error (%v)", tt.logPrefix(), err)
if tt.cleanup() != nil {
return
}
// reset preModifiedKey after clean up
preModifiedKey = 0
}
if round > 0 && round%500 == 0 { // every 500 rounds
if err := tt.defrag(); err != nil {
plog.Warningf("%s functional-tester returning with error (%v)", tt.logPrefix(), err)
tt.failed()
return
}
}
}
plog.Infof("%s functional-tester is finished", tt.logPrefix())
}
func (tt *tester) doRound(round int) error {
for j, f := range tt.failures {
caseTotalCounter.WithLabelValues(f.Desc()).Inc()
tt.status.setCase(j)
if err := tt.cluster.WaitHealth(); err != nil {
return fmt.Errorf("wait full health error: %v", err)
}
plog.Infof("%s injecting failure %q", tt.logPrefix(), f.Desc())
if err := f.Inject(tt.cluster, round); err != nil {
return fmt.Errorf("injection error: %v", err)
}
plog.Infof("%s injected failure", tt.logPrefix())
plog.Infof("%s recovering failure %q", tt.logPrefix(), f.Desc())
if err := f.Recover(tt.cluster, round); err != nil {
return fmt.Errorf("recovery error: %v", err)
}
plog.Infof("%s recovered failure", tt.logPrefix())
tt.pauseStresser()
plog.Infof("%s wait until cluster is healthy", tt.logPrefix())
if err := tt.cluster.WaitHealth(); err != nil {
return fmt.Errorf("wait full health error: %v", err)
}
plog.Infof("%s cluster is healthy", tt.logPrefix())
plog.Infof("%s checking consistency and invariant of cluster", tt.logPrefix())
if err := tt.checkConsistency(); err != nil {
return fmt.Errorf("tt.checkConsistency error (%v)", err)
}
plog.Infof("%s checking consistency and invariant of cluster done", tt.logPrefix())
plog.Infof("%s succeed!", tt.logPrefix())
}
return nil
}
func (tt *tester) updateRevision() error {
revs, _, err := tt.cluster.getRevisionHash()
for _, rev := range revs {
tt.currentRevision = rev
break // just need get one of the current revisions
}
plog.Infof("%s updated current revision to %d", tt.logPrefix(), tt.currentRevision)
return err
}
func (tt *tester) checkConsistency() (err error) {
defer func() {
if err != nil {
return
}
if err = tt.updateRevision(); err != nil {
plog.Warningf("%s functional-tester returning with tt.updateRevision error (%v)", tt.logPrefix(), err)
return
}
err = tt.startStresser()
}()
if err = tt.checker.Check(); err != nil {
plog.Infof("%s %v", tt.logPrefix(), err)
}
return err
}
func (tt *tester) compact(rev int64, timeout time.Duration) (err error) {
tt.pauseStresser()
defer func() {
if err == nil {
err = tt.startStresser()
}
}()
plog.Infof("%s compacting storage (current revision %d, compact revision %d)", tt.logPrefix(), tt.currentRevision, rev)
if err = tt.cluster.compactKV(rev, timeout); err != nil {
return err
}
plog.Infof("%s compacted storage (compact revision %d)", tt.logPrefix(), rev)
plog.Infof("%s checking compaction (compact revision %d)", tt.logPrefix(), rev)
if err = tt.cluster.checkCompact(rev); err != nil {
plog.Warningf("%s checkCompact error (%v)", tt.logPrefix(), err)
return err
}
plog.Infof("%s confirmed compaction (compact revision %d)", tt.logPrefix(), rev)
return nil
}
func (tt *tester) defrag() error {
plog.Infof("%s defragmenting...", tt.logPrefix())
if err := tt.cluster.defrag(); err != nil {
plog.Warningf("%s defrag error (%v)", tt.logPrefix(), err)
if cerr := tt.cleanup(); cerr != nil {
return fmt.Errorf("%s, %s", err, cerr)
}
return err
}
plog.Infof("%s defragmented...", tt.logPrefix())
return nil
}
func (tt *tester) logPrefix() string {
var (
rd = tt.status.getRound()
cs = tt.status.getCase()
prefix = fmt.Sprintf("[round#%d case#%d]", rd, cs)
)
if cs == -1 {
prefix = fmt.Sprintf("[round#%d]", rd)
}
return prefix
}
func (tt *tester) failed() {
if !tt.exitOnFailure {
return
}
plog.Warningf("%s exiting on failure", tt.logPrefix())
tt.cluster.Terminate()
os.Exit(2)
}
func (tt *tester) cleanup() error {
defer tt.failed()
roundFailedTotalCounter.Inc()
desc := "compact/defrag"
if tt.status.Case != -1 {
desc = tt.failures[tt.status.Case].Desc()
}
caseFailedTotalCounter.WithLabelValues(desc).Inc()
tt.closeStresser()
if err := tt.cluster.Cleanup(); err != nil {
plog.Warningf("%s cleanup error: %v", tt.logPrefix(), err)
return err
}
if err := tt.cluster.Reset(); err != nil {
plog.Warningf("%s cleanup Bootstrap error: %v", tt.logPrefix(), err)
return err
}
return tt.resetStressCheck()
}
func (tt *tester) pauseStresser() {
plog.Infof("%s pausing the stressers...", tt.logPrefix())
tt.stresser.Pause()
plog.Infof("%s paused stressers", tt.logPrefix())
}
func (tt *tester) startStresser() (err error) {
plog.Infof("%s starting the stressers...", tt.logPrefix())
err = tt.stresser.Stress()
plog.Infof("%s started stressers", tt.logPrefix())
return err
}
func (tt *tester) closeStresser() {
plog.Infof("%s closing the stressers...", tt.logPrefix())
tt.stresser.Close()
plog.Infof("%s closed stressers", tt.logPrefix())
}
func (tt *tester) resetStressCheck() error {
plog.Infof("%s resetting stressers and checkers...", tt.logPrefix())
cs := &compositeStresser{}
for _, m := range tt.cluster.Members {
s := NewStresser(tt.stresserType, &tt.scfg, m)
cs.stressers = append(cs.stressers, s)
}
tt.stresser = cs
if !tt.doChecks {
tt.checker = newNoChecker()
return tt.startStresser()
}
chk := newHashChecker(hashAndRevGetter(tt.cluster))
if schk := cs.Checker(); schk != nil {
chk = newCompositeChecker([]Checker{chk, schk})
}
tt.checker = chk
return tt.startStresser()
}
func (tt *tester) Report() int64 { return tt.stresser.ModifiedKeys() }

View File

@ -1,51 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"strings"
)
func getSameValue(vals map[string]int64) bool {
var rv int64
for _, v := range vals {
if rv == 0 {
rv = v
}
if rv != v {
return false
}
}
return true
}
func max(n1, n2 int64) int64 {
if n1 > n2 {
return n1
}
return n2
}
func errsToError(errs []error) error {
if len(errs) == 0 {
return nil
}
stringArr := make([]string, len(errs))
for i, err := range errs {
stringArr[i] = err.Error()
}
return fmt.Errorf(strings.Join(stringArr, ", "))
}

View File

@ -1,117 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"context"
"fmt"
"math/rand"
"net"
"net/http"
"sync"
"sync/atomic"
"time"
"golang.org/x/time/rate"
clientV2 "github.com/coreos/etcd/client"
)
type v2Stresser struct {
Endpoint string
keySize int
keySuffixRange int
N int
rateLimiter *rate.Limiter
wg sync.WaitGroup
atomicModifiedKey int64
cancel func()
}
func (s *v2Stresser) Stress() error {
cfg := clientV2.Config{
Endpoints: []string{s.Endpoint},
Transport: &http.Transport{
Dial: (&net.Dialer{
Timeout: time.Second,
KeepAlive: 30 * time.Second,
}).Dial,
MaxIdleConnsPerHost: s.N,
},
}
c, err := clientV2.New(cfg)
if err != nil {
return err
}
kv := clientV2.NewKeysAPI(c)
ctx, cancel := context.WithCancel(context.Background())
s.cancel = cancel
s.wg.Add(s.N)
for i := 0; i < s.N; i++ {
go func() {
defer s.wg.Done()
s.run(ctx, kv)
}()
}
return nil
}
func (s *v2Stresser) run(ctx context.Context, kv clientV2.KeysAPI) {
for {
if err := s.rateLimiter.Wait(ctx); err == context.Canceled {
return
}
setctx, setcancel := context.WithTimeout(ctx, clientV2.DefaultRequestTimeout)
key := fmt.Sprintf("foo%016x", rand.Intn(s.keySuffixRange))
_, err := kv.Set(setctx, key, string(randBytes(s.keySize)), nil)
if err == nil {
atomic.AddInt64(&s.atomicModifiedKey, 1)
}
setcancel()
if err == context.Canceled {
return
}
}
}
func (s *v2Stresser) Pause() {
s.cancel()
s.wg.Wait()
}
func (s *v2Stresser) Close() {
s.Pause()
}
func (s *v2Stresser) ModifiedKeys() int64 {
return atomic.LoadInt64(&s.atomicModifiedKey)
}
func (s *v2Stresser) Checker() Checker { return nil }
func randBytes(size int) []byte {
data := make([]byte, size)
for i := 0; i < size; i++ {
data[i] = byte(int('a') + rand.Intn(26))
}
return data
}

View File

@ -1,37 +0,0 @@
#!/usr/bin/env bash
<<COMMENT
# to run agent
./scripts/agent-1.sh
# to run with failpoints
ETCD_EXEC_PATH=/etcd-failpoints ./scripts/agent-1.sh
COMMENT
if ! [[ "$0" =~ "scripts/agent-1.sh" ]]; then
echo "must be run from tools/functional-tester"
exit 255
fi
if [ -z "${ETCD_EXEC_PATH}" ]; then
ETCD_EXEC_PATH=/etcd
echo "Running agent without failpoints:" ${ETCD_EXEC_PATH}
elif [[ "${ETCD_EXEC_PATH}" == "/etcd-failpoints" ]]; then
echo "Running agent with failpoints:" ${ETCD_EXEC_PATH}
else
echo "Cannot find executable:" ${ETCD_EXEC_PATH}
exit 255
fi
rm -rf `pwd`/agent-1 && mkdir -p `pwd`/agent-1
docker run \
--rm \
--net=host \
--name agent-1 \
--mount type=bind,source=`pwd`/agent-1,destination=/agent-1 \
gcr.io/etcd-development/etcd-functional-tester:go1.9.3 \
/bin/bash -c "/etcd-agent \
--etcd-path ${ETCD_EXEC_PATH} \
--etcd-log-dir /agent-1 \
--port :19027 \
--failpoint-addr :7381"

View File

@ -1,37 +0,0 @@
#!/usr/bin/env bash
<<COMMENT
# to run agent
./scripts/agent-2.sh
# to run with failpoints
ETCD_EXEC_PATH=/etcd-failpoints ./scripts/agent-2.sh
COMMENT
if ! [[ "$0" =~ "scripts/agent-2.sh" ]]; then
echo "must be run from tools/functional-tester"
exit 255
fi
if [ -z "${ETCD_EXEC_PATH}" ]; then
ETCD_EXEC_PATH=/etcd
echo "Running agent without failpoints:" ${ETCD_EXEC_PATH}
elif [[ "${ETCD_EXEC_PATH}" == "/etcd-failpoints" ]]; then
echo "Running agent with failpoints:" ${ETCD_EXEC_PATH}
else
echo "Cannot find executable:" ${ETCD_EXEC_PATH}
exit 255
fi
rm -rf `pwd`/agent-2 && mkdir -p `pwd`/agent-2
docker run \
--rm \
--net=host \
--name agent-2 \
--mount type=bind,source=`pwd`/agent-2,destination=/agent-2 \
gcr.io/etcd-development/etcd-functional-tester:go1.9.3 \
/bin/bash -c "/etcd-agent \
--etcd-path ${ETCD_EXEC_PATH} \
--etcd-log-dir /agent-2 \
--port :29027 \
--failpoint-addr :7382"

View File

@ -1,37 +0,0 @@
#!/usr/bin/env bash
<<COMMENT
# to run agent
./scripts/agent-3.sh
# to run with failpoints
ETCD_EXEC_PATH=/etcd-failpoints ./scripts/agent-3.sh
COMMENT
if ! [[ "$0" =~ "scripts/agent-3.sh" ]]; then
echo "must be run from tools/functional-tester"
exit 255
fi
if [ -z "${ETCD_EXEC_PATH}" ]; then
ETCD_EXEC_PATH=/etcd
echo "Running agent without failpoints:" ${ETCD_EXEC_PATH}
elif [[ "${ETCD_EXEC_PATH}" == "/etcd-failpoints" ]]; then
echo "Running agent with failpoints:" ${ETCD_EXEC_PATH}
else
echo "Cannot find executable:" ${ETCD_EXEC_PATH}
exit 255
fi
rm -rf `pwd`/agent-3 && mkdir -p `pwd`/agent-3
docker run \
--rm \
--net=host \
--name agent-3 \
--mount type=bind,source=`pwd`/agent-3,destination=/agent-3 \
gcr.io/etcd-development/etcd-functional-tester:go1.9.3 \
/bin/bash -c "/etcd-agent \
--etcd-path ${ETCD_EXEC_PATH} \
--etcd-log-dir /agent-3 \
--port :39027 \
--failpoint-addr :7383"

View File

@ -1,24 +0,0 @@
#!/usr/bin/env bash
if ! [[ "$0" =~ "scripts/tester-limit.sh" ]]; then
echo "must be run from tools/functional-tester"
exit 255
fi
# to run only 1 test round
docker run \
--rm \
--net=host \
--name tester \
gcr.io/etcd-development/etcd-functional-tester:go1.9.3 \
/bin/bash -c "/etcd-tester \
--agent-endpoints '127.0.0.1:19027,127.0.0.1:29027,127.0.0.1:39027' \
--client-ports 1379,2379,3379 \
--advertise-client-ports 13790,23790,33790 \
--peer-ports 1380,2380,3380 \
--advertise-peer-ports 13800,23800,33800 \
--limit 1 \
--stress-qps=2500 \
--stress-key-txn-count 100 \
--stress-key-txn-ops 10 \
--exit-on-failure"

View File

@ -1,25 +0,0 @@
#!/usr/bin/env bash
if ! [[ "$0" =~ "scripts/tester-runner.sh" ]]; then
echo "must be run from tools/functional-tester"
exit 255
fi
# to run with etcd-runner
docker run \
--rm \
--net=host \
--name tester \
gcr.io/etcd-development/etcd-functional-tester:go1.9.3 \
/bin/bash -c "/etcd-tester \
--agent-endpoints '127.0.0.1:19027,127.0.0.1:29027,127.0.0.1:39027' \
--client-ports 1379,2379,3379 \
--advertise-client-ports 13790,23790,33790 \
--peer-ports 1380,2380,3380 \
--advertise-peer-ports 13800,23800,33800 \
--stress-qps=2500 \
--stress-key-txn-count 100 \
--stress-key-txn-ops 10 \
--etcd-runner /etcd-runner \
--stresser=keys,lease,election-runner,watch-runner,lock-racer-runner,lease-runner \
--exit-on-failure"

View File

@ -1,22 +0,0 @@
#!/usr/bin/env bash
if ! [[ "$0" =~ "scripts/tester.sh" ]]; then
echo "must be run from tools/functional-tester"
exit 255
fi
docker run \
--rm \
--net=host \
--name tester \
gcr.io/etcd-development/etcd-functional-tester:go1.9.3 \
/bin/bash -c "/etcd-tester \
--agent-endpoints '127.0.0.1:19027,127.0.0.1:29027,127.0.0.1:39027' \
--client-ports 1379,2379,3379 \
--advertise-client-ports 13790,23790,33790 \
--peer-ports 1380,2380,3380 \
--advertise-peer-ports 13800,23800,33800 \
--stress-qps=2500 \
--stress-key-txn-count 100 \
--stress-key-txn-ops 10 \
--exit-on-failure"

View File

@ -1,21 +0,0 @@
# Use goreman to run `go get github.com/mattn/goreman`
# peer bridges
pbridge1: tools/local-tester/bridge.sh 127.0.0.1:11111 127.0.0.1:12380
pbridge2: tools/local-tester/bridge.sh 127.0.0.1:22222 127.0.0.1:22380
pbridge3: tools/local-tester/bridge.sh 127.0.0.1:33333 127.0.0.1:32380
# client bridges
cbridge1: tools/local-tester/bridge.sh 127.0.0.1:2379 127.0.0.1:11119
cbridge2: tools/local-tester/bridge.sh 127.0.0.1:22379 127.0.0.1:22229
cbridge3: tools/local-tester/bridge.sh 127.0.0.1:32379 127.0.0.1:33339
faults: tools/local-tester/faults.sh
stress-put: tools/benchmark/benchmark --endpoints=127.0.0.1:2379,127.0.0.1:22379,127.0.0.1:32379 --clients=27 --conns=3 put --sequential-keys --key-space-size=100000 --total=100000
etcd1: GOFAIL_HTTP="127.0.0.1:11180" bin/etcd --name infra1 --snapshot-count=1000 --listen-client-urls http://127.0.0.1:11119 --advertise-client-urls http://127.0.0.1:2379 --listen-peer-urls http://127.0.0.1:12380 --initial-advertise-peer-urls http://127.0.0.1:11111 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:11111,infra2=http://127.0.0.1:22222,infra3=http://127.0.0.1:33333' --initial-cluster-state new --enable-pprof
etcd2: GOFAIL_HTTP="127.0.0.1:22280" bin/etcd --name infra2 --snapshot-count=1000 --listen-client-urls http://127.0.0.1:22229 --advertise-client-urls http://127.0.0.1:22379 --listen-peer-urls http://127.0.0.1:22380 --initial-advertise-peer-urls http://127.0.0.1:22222 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:11111,infra2=http://127.0.0.1:22222,infra3=http://127.0.0.1:33333' --initial-cluster-state new --enable-pprof
etcd3: GOFAIL_HTTP="127.0.0.1:33380" bin/etcd --name infra3 --snapshot-count=1000 --listen-client-urls http://127.0.0.1:33339 --advertise-client-urls http://127.0.0.1:32379 --listen-peer-urls http://127.0.0.1:32380 --initial-advertise-peer-urls http://127.0.0.1:33333 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:11111,infra2=http://127.0.0.1:22222,infra3=http://127.0.0.1:33333' --initial-cluster-state new --enable-pprof
# in future, use proxy to listen on 2379
#proxy: bin/etcd --name infra-proxy1 --proxy=on --listen-client-urls http://127.0.0.1:2378 --initial-cluster 'infra1=http://127.0.0.1:12380,infra2=http://127.0.0.1:22380,infra3=http://127.0.0.1:32380' --enable-pprof

View File

@ -1,25 +0,0 @@
# etcd local-tester
The etcd local-tester runs a fault injected cluster using local processes. It sets up an etcd cluster with unreliable network bridges on its peer and client interfaces. The cluster runs with a constant stream of `Put` requests to simulate client usage. A fault injection script periodically kills cluster members and disrupts bridge connectivity.
# Requirements
local-tester depends on `goreman` to manage its processes and `bash` to run fault injection.
# Building
local-tester needs `etcd`, `benchmark`, and `bridge` binaries. To build these binaries, run the following from the etcd repository root:
```sh
./build
pushd tools/benchmark/ && go build && popd
pushd tools/local-tester/bridge && go build && popd
```
# Running
The fault injected cluster is invoked with `goreman`:
```sh
goreman -f tools/local-tester/Procfile start
```

View File

@ -1,16 +0,0 @@
#!/bin/sh
exec tools/local-tester/bridge/bridge \
-delay-accept \
-reset-listen \
-conn-fault-rate=0.25 \
-immediate-close \
-blackhole \
-time-close \
-write-remote-only \
-read-remote-only \
-random-blackhole \
-corrupt-receive \
-corrupt-send \
-reorder \
$@

View File

@ -1,320 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package main is the entry point for the local tester network bridge.
package main
import (
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"math/rand"
"net"
"sync"
"time"
)
type bridgeConn struct {
in net.Conn
out net.Conn
d dispatcher
}
func newBridgeConn(in net.Conn, d dispatcher) (*bridgeConn, error) {
out, err := net.Dial("tcp", flag.Args()[1])
if err != nil {
in.Close()
return nil, err
}
return &bridgeConn{in, out, d}, nil
}
func (b *bridgeConn) String() string {
return fmt.Sprintf("%v <-> %v", b.in.RemoteAddr(), b.out.RemoteAddr())
}
func (b *bridgeConn) Close() {
b.in.Close()
b.out.Close()
}
func bridge(b *bridgeConn) {
log.Println("bridging", b.String())
go b.d.Copy(b.out, makeFetch(b.in))
b.d.Copy(b.in, makeFetch(b.out))
}
func delayBridge(b *bridgeConn, txDelay, rxDelay time.Duration) {
go b.d.Copy(b.out, makeFetchDelay(makeFetch(b.in), txDelay))
b.d.Copy(b.in, makeFetchDelay(makeFetch(b.out), rxDelay))
}
func timeBridge(b *bridgeConn) {
go func() {
t := time.Duration(rand.Intn(5)+1) * time.Second
time.Sleep(t)
log.Printf("killing connection %s after %v\n", b.String(), t)
b.Close()
}()
bridge(b)
}
func blackhole(b *bridgeConn) {
log.Println("blackholing connection", b.String())
io.Copy(ioutil.Discard, b.in)
b.Close()
}
func readRemoteOnly(b *bridgeConn) {
log.Println("one way (<-)", b.String())
b.d.Copy(b.in, makeFetch(b.out))
}
func writeRemoteOnly(b *bridgeConn) {
log.Println("one way (->)", b.String())
b.d.Copy(b.out, makeFetch(b.in))
}
func corruptReceive(b *bridgeConn) {
log.Println("corruptReceive", b.String())
go b.d.Copy(b.in, makeFetchCorrupt(makeFetch(b.out)))
b.d.Copy(b.out, makeFetch(b.in))
}
func corruptSend(b *bridgeConn) {
log.Println("corruptSend", b.String())
go b.d.Copy(b.out, makeFetchCorrupt(makeFetch(b.in)))
b.d.Copy(b.in, makeFetch(b.out))
}
func makeFetch(c io.Reader) fetchFunc {
return func() ([]byte, error) {
b := make([]byte, 4096)
n, err := c.Read(b)
if err != nil {
return nil, err
}
return b[:n], nil
}
}
func makeFetchCorrupt(f func() ([]byte, error)) fetchFunc {
return func() ([]byte, error) {
b, err := f()
if err != nil {
return nil, err
}
// corrupt one byte approximately every 16K
for i := 0; i < len(b); i++ {
if rand.Intn(16*1024) == 0 {
b[i] = b[i] + 1
}
}
return b, nil
}
}
func makeFetchRand(f func() ([]byte, error)) fetchFunc {
return func() ([]byte, error) {
if rand.Intn(10) == 0 {
return nil, fmt.Errorf("fetchRand: done")
}
b, err := f()
if err != nil {
return nil, err
}
return b, nil
}
}
func makeFetchDelay(f fetchFunc, delay time.Duration) fetchFunc {
return func() ([]byte, error) {
b, err := f()
if err != nil {
return nil, err
}
time.Sleep(delay)
return b, nil
}
}
func randomBlackhole(b *bridgeConn) {
log.Println("random blackhole: connection", b.String())
var wg sync.WaitGroup
wg.Add(2)
go func() {
b.d.Copy(b.in, makeFetchRand(makeFetch(b.out)))
wg.Done()
}()
go func() {
b.d.Copy(b.out, makeFetchRand(makeFetch(b.in)))
wg.Done()
}()
wg.Wait()
b.Close()
}
type config struct {
delayAccept bool
resetListen bool
connFaultRate float64
immediateClose bool
blackhole bool
timeClose bool
writeRemoteOnly bool
readRemoteOnly bool
randomBlackhole bool
corruptSend bool
corruptReceive bool
reorder bool
txDelay string
rxDelay string
}
type acceptFaultFunc func()
type connFaultFunc func(*bridgeConn)
func main() {
var cfg config
flag.BoolVar(&cfg.delayAccept, "delay-accept", false, "delays accepting new connections")
flag.BoolVar(&cfg.resetListen, "reset-listen", false, "resets the listening port")
flag.Float64Var(&cfg.connFaultRate, "conn-fault-rate", 0.0, "rate of faulty connections")
flag.BoolVar(&cfg.immediateClose, "immediate-close", false, "close after accept")
flag.BoolVar(&cfg.blackhole, "blackhole", false, "reads nothing, writes go nowhere")
flag.BoolVar(&cfg.timeClose, "time-close", false, "close after random time")
flag.BoolVar(&cfg.writeRemoteOnly, "write-remote-only", false, "only write, no read")
flag.BoolVar(&cfg.readRemoteOnly, "read-remote-only", false, "only read, no write")
flag.BoolVar(&cfg.randomBlackhole, "random-blackhole", false, "blackhole after data xfer")
flag.BoolVar(&cfg.corruptReceive, "corrupt-receive", false, "corrupt packets received from destination")
flag.BoolVar(&cfg.corruptSend, "corrupt-send", false, "corrupt packets sent to destination")
flag.BoolVar(&cfg.reorder, "reorder", false, "reorder packet delivery")
flag.StringVar(&cfg.txDelay, "tx-delay", "0", "duration to delay client transmission to server")
flag.StringVar(&cfg.rxDelay, "rx-delay", "0", "duration to delay client receive from server")
flag.Parse()
lAddr := flag.Args()[0]
fwdAddr := flag.Args()[1]
log.Println("listening on ", lAddr)
log.Println("forwarding to ", fwdAddr)
l, err := net.Listen("tcp", lAddr)
if err != nil {
log.Fatal(err)
}
defer l.Close()
acceptFaults := []acceptFaultFunc{func() {}}
if cfg.delayAccept {
f := func() {
log.Println("delaying accept")
time.Sleep(3 * time.Second)
}
acceptFaults = append(acceptFaults, f)
}
if cfg.resetListen {
f := func() {
log.Println("reset listen port")
l.Close()
newListener, err := net.Listen("tcp", lAddr)
if err != nil {
log.Fatal(err)
}
l = newListener
}
acceptFaults = append(acceptFaults, f)
}
connFaults := []connFaultFunc{func(b *bridgeConn) { bridge(b) }}
if cfg.immediateClose {
f := func(b *bridgeConn) {
log.Printf("terminating connection %s immediately", b.String())
b.Close()
}
connFaults = append(connFaults, f)
}
if cfg.blackhole {
connFaults = append(connFaults, blackhole)
}
if cfg.timeClose {
connFaults = append(connFaults, timeBridge)
}
if cfg.writeRemoteOnly {
connFaults = append(connFaults, writeRemoteOnly)
}
if cfg.readRemoteOnly {
connFaults = append(connFaults, readRemoteOnly)
}
if cfg.randomBlackhole {
connFaults = append(connFaults, randomBlackhole)
}
if cfg.corruptSend {
connFaults = append(connFaults, corruptSend)
}
if cfg.corruptReceive {
connFaults = append(connFaults, corruptReceive)
}
txd, txdErr := time.ParseDuration(cfg.txDelay)
if txdErr != nil {
log.Fatal(txdErr)
}
rxd, rxdErr := time.ParseDuration(cfg.rxDelay)
if rxdErr != nil {
log.Fatal(rxdErr)
}
if txd != 0 || rxd != 0 {
f := func(b *bridgeConn) { delayBridge(b, txd, rxd) }
connFaults = append(connFaults, f)
}
if len(connFaults) > 1 && cfg.connFaultRate == 0 {
log.Fatal("connection faults defined but conn-fault-rate=0")
}
var disp dispatcher
if cfg.reorder {
disp = newDispatcherPool()
} else {
disp = newDispatcherImmediate()
}
for {
acceptFaults[rand.Intn(len(acceptFaults))]()
conn, err := l.Accept()
if err != nil {
log.Fatal(err)
}
r := rand.Intn(len(connFaults))
if rand.Intn(100) >= int(100.0*cfg.connFaultRate) {
r = 0
}
bc, err := newBridgeConn(conn, disp)
if err != nil {
log.Printf("oops %v", err)
continue
}
go connFaults[r](bc)
}
}

View File

@ -1,140 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"io"
"math/rand"
"sync"
"time"
)
var (
// dispatchPoolDelay is the time to wait before flushing all buffered packets
dispatchPoolDelay = 100 * time.Millisecond
// dispatchPacketBytes is how many bytes to send until choosing a new connection
dispatchPacketBytes = 32
)
type dispatcher interface {
// Copy works like io.Copy using buffers provided by fetchFunc
Copy(io.Writer, fetchFunc) error
}
type fetchFunc func() ([]byte, error)
type dispatcherPool struct {
// mu protects the dispatch packet queue 'q'
mu sync.Mutex
q []dispatchPacket
}
type dispatchPacket struct {
buf []byte
out io.Writer
}
func newDispatcherPool() dispatcher {
d := &dispatcherPool{}
go d.writeLoop()
return d
}
func (d *dispatcherPool) writeLoop() {
for {
time.Sleep(dispatchPoolDelay)
d.flush()
}
}
func (d *dispatcherPool) flush() {
d.mu.Lock()
pkts := d.q
d.q = nil
d.mu.Unlock()
if len(pkts) == 0 {
return
}
// sort by sockets; preserve the packet ordering within a socket
pktmap := make(map[io.Writer][]dispatchPacket)
outs := []io.Writer{}
for _, pkt := range pkts {
opkts, ok := pktmap[pkt.out]
if !ok {
outs = append(outs, pkt.out)
}
pktmap[pkt.out] = append(opkts, pkt)
}
// send all packets in pkts
for len(outs) != 0 {
// randomize writer on every write
r := rand.Intn(len(outs))
rpkts := pktmap[outs[r]]
rpkts[0].out.Write(rpkts[0].buf)
// dequeue packet
rpkts = rpkts[1:]
if len(rpkts) == 0 {
delete(pktmap, outs[r])
outs = append(outs[:r], outs[r+1:]...)
} else {
pktmap[outs[r]] = rpkts
}
}
}
func (d *dispatcherPool) Copy(w io.Writer, f fetchFunc) error {
for {
b, err := f()
if err != nil {
return err
}
pkts := []dispatchPacket{}
for len(b) > 0 {
pkt := b
if len(b) > dispatchPacketBytes {
pkt = pkt[:dispatchPacketBytes]
b = b[dispatchPacketBytes:]
} else {
b = nil
}
pkts = append(pkts, dispatchPacket{pkt, w})
}
d.mu.Lock()
d.q = append(d.q, pkts...)
d.mu.Unlock()
}
}
type dispatcherImmediate struct{}
func newDispatcherImmediate() dispatcher {
return &dispatcherImmediate{}
}
func (d *dispatcherImmediate) Copy(w io.Writer, f fetchFunc) error {
for {
b, err := f()
if err != nil {
return err
}
if _, err := w.Write(b); err != nil {
return err
}
}
}

View File

@ -1,108 +0,0 @@
#!/bin/bash
PROCFILE="tools/local-tester/Procfile"
HTTPFAIL=(127.0.0.1:11180 127.0.0.1:22280 127.0.0.1:33380)
function wait_time {
expr $RANDOM % 10 + 1
}
function cycle {
for a; do
echo "cycling $a"
goreman -f $PROCFILE run stop $a || echo "could not stop $a"
sleep `wait_time`s
goreman -f $PROCFILE run restart $a || echo "could not restart $a"
done
}
function cycle_members {
cycle etcd1 etcd2 etcd3
}
function cycle_pbridge {
cycle pbridge1 pbridge2 pbridge3
}
function cycle_cbridge {
cycle cbridge1 cbridge2 cbridge3
}
function cycle_stresser {
cycle stress-put
}
function kill_maj {
idx="etcd"`expr $RANDOM % 3 + 1`
idx2="$idx"
while [ "$idx" == "$idx2" ]; do
idx2="etcd"`expr $RANDOM % 3 + 1`
done
echo "kill majority $idx $idx2"
goreman -f $PROCFILE run stop $idx || echo "could not stop $idx"
goreman -f $PROCFILE run stop $idx2 || echo "could not stop $idx2"
sleep `wait_time`s
goreman -f $PROCFILE run restart $idx || echo "could not restart $idx"
goreman -f $PROCFILE run restart $idx2 || echo "could not restart $idx2"
}
function kill_all {
for a in etcd1 etcd2 etcd3; do
goreman -f $PROCFILE run stop $a || echo "could not stop $a"
done
sleep `wait_time`s
for a in etcd1 etcd2 etcd3; do
goreman -f $PROCFILE run restart $a || echo "could not restart $a"
done
}
function rand_fp {
echo "$FAILPOINTS" | sed `expr $RANDOM % $NUMFPS + 1`"q;d"
}
# fp_activate <http> <fppath> <value>
function fp_activate {
curl "$1"/"$2" -XPUT -d "$3" >/dev/null 2>&1
}
function fp_rand_single {
fp=`rand_fp`
fp_activate ${HTTPFAIL[`expr $RANDOM % ${#HTTPFAIL[@]}`]} $fp 'panic("'$fp'")'
sleep `wait_time`s
}
function fp_rand_all {
fp=`rand_fp`
for a in `seq ${#HTTPFAIL[@]}`; do fp_activate ${HTTPFAIL[$a]} "$fp" 'panic("'$fp'")'; done
sleep `wait_time`s
}
function fp_all_rand_fire {
for fp in $FAILPOINTS; do
for url in "${HTTPFAIL[@]}"; do
fp_activate "$url" "$fp" '0.5%panic("0.5%'$fp'")'
done
done
}
function choose {
fault=${FAULTS[`expr $RANDOM % ${#FAULTS[@]}`]}
echo $fault
$fault || echo "failed: $fault"
}
sleep 2s
FAULTS=(cycle_members kill_maj kill_all cycle_pbridge cycle_cbridge cycle_stresser)
# add failpoint faults if available
FAILPOINTS=`curl http://"${HTTPFAIL[0]}" 2>/dev/null | cut -f1 -d'=' | grep -v "^$"`
NUMFPS=`echo $(echo "$FAILPOINTS" | wc -l)`
if [ "$NUMFPS" != "0" ]; then
FAULTS+=(fp_rand_single)
FAULTS+=(fp_rand_all)
fi
while [ 1 ]; do
choose
# start any nodes that have been killed by failpoints
for a in etcd1 etcd2 etcd3; do goreman -f $PROCFILE run start $a; done
fp_all_rand_fire
done