mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
tools: remove
Signed-off-by: Gyuho Lee <gyuhox@gmail.com>
This commit is contained in:
parent
dd9f05567d
commit
074e417770
1
tools/benchmark/.gitignore
vendored
1
tools/benchmark/.gitignore
vendored
@ -1 +0,0 @@
|
||||
benchmark
|
@ -1,16 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package cmd implements individual benchmark commands for the benchmark utility.
|
||||
package cmd
|
@ -1,85 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
v3 "github.com/coreos/etcd/clientv3"
|
||||
"github.com/coreos/etcd/pkg/report"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"gopkg.in/cheggaaa/pb.v1"
|
||||
)
|
||||
|
||||
var leaseKeepaliveCmd = &cobra.Command{
|
||||
Use: "lease-keepalive",
|
||||
Short: "Benchmark lease keepalive",
|
||||
|
||||
Run: leaseKeepaliveFunc,
|
||||
}
|
||||
|
||||
var (
|
||||
leaseKeepaliveTotal int
|
||||
)
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(leaseKeepaliveCmd)
|
||||
leaseKeepaliveCmd.Flags().IntVar(&leaseKeepaliveTotal, "total", 10000, "Total number of lease keepalive requests")
|
||||
}
|
||||
|
||||
func leaseKeepaliveFunc(cmd *cobra.Command, args []string) {
|
||||
requests := make(chan struct{})
|
||||
clients := mustCreateClients(totalClients, totalConns)
|
||||
|
||||
bar = pb.New(leaseKeepaliveTotal)
|
||||
bar.Format("Bom !")
|
||||
bar.Start()
|
||||
|
||||
r := newReport()
|
||||
for i := range clients {
|
||||
wg.Add(1)
|
||||
go func(c v3.Lease) {
|
||||
defer wg.Done()
|
||||
resp, err := c.Grant(context.Background(), 100)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
for range requests {
|
||||
st := time.Now()
|
||||
_, err := c.KeepAliveOnce(context.TODO(), resp.ID)
|
||||
r.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
|
||||
bar.Increment()
|
||||
}
|
||||
}(clients[i])
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for i := 0; i < leaseKeepaliveTotal; i++ {
|
||||
requests <- struct{}{}
|
||||
}
|
||||
close(requests)
|
||||
}()
|
||||
|
||||
rc := r.Run()
|
||||
wg.Wait()
|
||||
close(r.Results())
|
||||
bar.Finish()
|
||||
fmt.Printf("%s", <-rc)
|
||||
}
|
@ -1,135 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"crypto/rand"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime/pprof"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/etcd/lease"
|
||||
"github.com/coreos/etcd/pkg/report"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
// mvccPutCmd represents a storage put performance benchmarking tool
|
||||
var mvccPutCmd = &cobra.Command{
|
||||
Use: "put",
|
||||
Short: "Benchmark put performance of storage",
|
||||
|
||||
Run: mvccPutFunc,
|
||||
}
|
||||
|
||||
var (
|
||||
mvccTotalRequests int
|
||||
storageKeySize int
|
||||
valueSize int
|
||||
txn bool
|
||||
nrTxnOps int
|
||||
)
|
||||
|
||||
func init() {
|
||||
mvccCmd.AddCommand(mvccPutCmd)
|
||||
|
||||
mvccPutCmd.Flags().IntVar(&mvccTotalRequests, "total", 100, "a total number of keys to put")
|
||||
mvccPutCmd.Flags().IntVar(&storageKeySize, "key-size", 64, "a size of key (Byte)")
|
||||
mvccPutCmd.Flags().IntVar(&valueSize, "value-size", 64, "a size of value (Byte)")
|
||||
mvccPutCmd.Flags().BoolVar(&txn, "txn", false, "put a key in transaction or not")
|
||||
mvccPutCmd.Flags().IntVar(&nrTxnOps, "txn-ops", 1, "a number of keys to put per transaction")
|
||||
|
||||
// TODO: after the PR https://github.com/spf13/cobra/pull/220 is merged, the below pprof related flags should be moved to RootCmd
|
||||
mvccPutCmd.Flags().StringVar(&cpuProfPath, "cpuprofile", "", "the path of file for storing cpu profile result")
|
||||
mvccPutCmd.Flags().StringVar(&memProfPath, "memprofile", "", "the path of file for storing heap profile result")
|
||||
|
||||
}
|
||||
|
||||
func createBytesSlice(bytesN, sliceN int) [][]byte {
|
||||
rs := make([][]byte, sliceN)
|
||||
for i := range rs {
|
||||
rs[i] = make([]byte, bytesN)
|
||||
if _, err := rand.Read(rs[i]); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
return rs
|
||||
}
|
||||
|
||||
func mvccPutFunc(cmd *cobra.Command, args []string) {
|
||||
if cpuProfPath != "" {
|
||||
f, err := os.Create(cpuProfPath)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, "Failed to create a file for storing cpu profile result: ", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
err = pprof.StartCPUProfile(f)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, "Failed to start cpu profile: ", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer pprof.StopCPUProfile()
|
||||
}
|
||||
|
||||
if memProfPath != "" {
|
||||
f, err := os.Create(memProfPath)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, "Failed to create a file for storing heap profile result: ", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
err := pprof.WriteHeapProfile(f)
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, "Failed to write heap profile result: ", err)
|
||||
// can do nothing for handling the error
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
keys := createBytesSlice(storageKeySize, mvccTotalRequests*nrTxnOps)
|
||||
vals := createBytesSlice(valueSize, mvccTotalRequests*nrTxnOps)
|
||||
|
||||
weight := float64(nrTxnOps)
|
||||
r := newWeightedReport()
|
||||
rrc := r.Results()
|
||||
|
||||
rc := r.Run()
|
||||
|
||||
if txn {
|
||||
for i := 0; i < mvccTotalRequests; i++ {
|
||||
st := time.Now()
|
||||
|
||||
tw := s.Write()
|
||||
for j := i; j < i+nrTxnOps; j++ {
|
||||
tw.Put(keys[j], vals[j], lease.NoLease)
|
||||
}
|
||||
tw.End()
|
||||
|
||||
rrc <- report.Result{Start: st, End: time.Now(), Weight: weight}
|
||||
}
|
||||
} else {
|
||||
for i := 0; i < mvccTotalRequests; i++ {
|
||||
st := time.Now()
|
||||
s.Put(keys[i], vals[i], lease.NoLease)
|
||||
rrc <- report.Result{Start: st, End: time.Now()}
|
||||
}
|
||||
}
|
||||
|
||||
close(r.Results())
|
||||
fmt.Printf("%s", <-rc)
|
||||
}
|
@ -1,61 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/etcd/lease"
|
||||
"github.com/coreos/etcd/mvcc"
|
||||
"github.com/coreos/etcd/mvcc/backend"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var (
|
||||
batchInterval int
|
||||
batchLimit int
|
||||
|
||||
s mvcc.KV
|
||||
)
|
||||
|
||||
func initMVCC() {
|
||||
bcfg := backend.DefaultBackendConfig()
|
||||
bcfg.Path, bcfg.BatchInterval, bcfg.BatchLimit = "mvcc-bench", time.Duration(batchInterval)*time.Millisecond, batchLimit
|
||||
be := backend.New(bcfg)
|
||||
s = mvcc.NewStore(be, &lease.FakeLessor{}, nil)
|
||||
os.Remove("mvcc-bench") // boltDB has an opened fd, so removing the file is ok
|
||||
}
|
||||
|
||||
// mvccCmd represents the MVCC storage benchmarking tools
|
||||
var mvccCmd = &cobra.Command{
|
||||
Use: "mvcc",
|
||||
Short: "Benchmark mvcc",
|
||||
Long: `storage subcommand is a set of various benchmark tools for MVCC storage subsystem of etcd.
|
||||
Actual benchmarks are implemented as its subcommands.`,
|
||||
|
||||
PersistentPreRun: mvccPreRun,
|
||||
}
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(mvccCmd)
|
||||
|
||||
mvccCmd.PersistentFlags().IntVar(&batchInterval, "batch-interval", 100, "Interval of batching (milliseconds)")
|
||||
mvccCmd.PersistentFlags().IntVar(&batchLimit, "batch-limit", 10000, "A limit of batched transaction")
|
||||
}
|
||||
|
||||
func mvccPreRun(cmd *cobra.Command, args []string) {
|
||||
initMVCC()
|
||||
}
|
@ -1,152 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/rand"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
v3 "github.com/coreos/etcd/clientv3"
|
||||
"github.com/coreos/etcd/pkg/report"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/time/rate"
|
||||
"gopkg.in/cheggaaa/pb.v1"
|
||||
)
|
||||
|
||||
// putCmd represents the put command
|
||||
var putCmd = &cobra.Command{
|
||||
Use: "put",
|
||||
Short: "Benchmark put",
|
||||
|
||||
Run: putFunc,
|
||||
}
|
||||
|
||||
var (
|
||||
keySize int
|
||||
valSize int
|
||||
|
||||
putTotal int
|
||||
putRate int
|
||||
|
||||
keySpaceSize int
|
||||
seqKeys bool
|
||||
|
||||
compactInterval time.Duration
|
||||
compactIndexDelta int64
|
||||
)
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(putCmd)
|
||||
putCmd.Flags().IntVar(&keySize, "key-size", 8, "Key size of put request")
|
||||
putCmd.Flags().IntVar(&valSize, "val-size", 8, "Value size of put request")
|
||||
putCmd.Flags().IntVar(&putRate, "rate", 0, "Maximum puts per second (0 is no limit)")
|
||||
|
||||
putCmd.Flags().IntVar(&putTotal, "total", 10000, "Total number of put requests")
|
||||
putCmd.Flags().IntVar(&keySpaceSize, "key-space-size", 1, "Maximum possible keys")
|
||||
putCmd.Flags().BoolVar(&seqKeys, "sequential-keys", false, "Use sequential keys")
|
||||
putCmd.Flags().DurationVar(&compactInterval, "compact-interval", 0, `Interval to compact database (do not duplicate this with etcd's 'auto-compaction-retention' flag) (e.g. --compact-interval=5m compacts every 5-minute)`)
|
||||
putCmd.Flags().Int64Var(&compactIndexDelta, "compact-index-delta", 1000, "Delta between current revision and compact revision (e.g. current revision 10000, compact at 9000)")
|
||||
}
|
||||
|
||||
func putFunc(cmd *cobra.Command, args []string) {
|
||||
if keySpaceSize <= 0 {
|
||||
fmt.Fprintf(os.Stderr, "expected positive --key-space-size, got (%v)", keySpaceSize)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
requests := make(chan v3.Op, totalClients)
|
||||
if putRate == 0 {
|
||||
putRate = math.MaxInt32
|
||||
}
|
||||
limit := rate.NewLimiter(rate.Limit(putRate), 1)
|
||||
clients := mustCreateClients(totalClients, totalConns)
|
||||
k, v := make([]byte, keySize), string(mustRandBytes(valSize))
|
||||
|
||||
bar = pb.New(putTotal)
|
||||
bar.Format("Bom !")
|
||||
bar.Start()
|
||||
|
||||
r := newReport()
|
||||
for i := range clients {
|
||||
wg.Add(1)
|
||||
go func(c *v3.Client) {
|
||||
defer wg.Done()
|
||||
for op := range requests {
|
||||
limit.Wait(context.Background())
|
||||
|
||||
st := time.Now()
|
||||
_, err := c.Do(context.Background(), op)
|
||||
r.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
|
||||
bar.Increment()
|
||||
}
|
||||
}(clients[i])
|
||||
}
|
||||
|
||||
go func() {
|
||||
for i := 0; i < putTotal; i++ {
|
||||
if seqKeys {
|
||||
binary.PutVarint(k, int64(i%keySpaceSize))
|
||||
} else {
|
||||
binary.PutVarint(k, int64(rand.Intn(keySpaceSize)))
|
||||
}
|
||||
requests <- v3.OpPut(string(k), v)
|
||||
}
|
||||
close(requests)
|
||||
}()
|
||||
|
||||
if compactInterval > 0 {
|
||||
go func() {
|
||||
for {
|
||||
time.Sleep(compactInterval)
|
||||
compactKV(clients)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
rc := r.Run()
|
||||
wg.Wait()
|
||||
close(r.Results())
|
||||
bar.Finish()
|
||||
fmt.Println(<-rc)
|
||||
}
|
||||
|
||||
func compactKV(clients []*v3.Client) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
resp, err := clients[0].KV.Get(ctx, "foo")
|
||||
cancel()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
revToCompact := max(0, resp.Header.Revision-compactIndexDelta)
|
||||
ctx, cancel = context.WithTimeout(context.Background(), 5*time.Second)
|
||||
_, err = clients[0].KV.Compact(ctx, revToCompact)
|
||||
cancel()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func max(n1, n2 int64) int64 {
|
||||
if n1 > n2 {
|
||||
return n1
|
||||
}
|
||||
return n2
|
||||
}
|
@ -1,119 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
v3 "github.com/coreos/etcd/clientv3"
|
||||
"github.com/coreos/etcd/pkg/report"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/time/rate"
|
||||
"gopkg.in/cheggaaa/pb.v1"
|
||||
)
|
||||
|
||||
// rangeCmd represents the range command
|
||||
var rangeCmd = &cobra.Command{
|
||||
Use: "range key [end-range]",
|
||||
Short: "Benchmark range",
|
||||
|
||||
Run: rangeFunc,
|
||||
}
|
||||
|
||||
var (
|
||||
rangeRate int
|
||||
rangeTotal int
|
||||
rangeConsistency string
|
||||
)
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(rangeCmd)
|
||||
rangeCmd.Flags().IntVar(&rangeRate, "rate", 0, "Maximum range requests per second (0 is no limit)")
|
||||
rangeCmd.Flags().IntVar(&rangeTotal, "total", 10000, "Total number of range requests")
|
||||
rangeCmd.Flags().StringVar(&rangeConsistency, "consistency", "l", "Linearizable(l) or Serializable(s)")
|
||||
}
|
||||
|
||||
func rangeFunc(cmd *cobra.Command, args []string) {
|
||||
if len(args) == 0 || len(args) > 2 {
|
||||
fmt.Fprintln(os.Stderr, cmd.Usage())
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
k := args[0]
|
||||
end := ""
|
||||
if len(args) == 2 {
|
||||
end = args[1]
|
||||
}
|
||||
|
||||
if rangeConsistency == "l" {
|
||||
fmt.Println("bench with linearizable range")
|
||||
} else if rangeConsistency == "s" {
|
||||
fmt.Println("bench with serializable range")
|
||||
} else {
|
||||
fmt.Fprintln(os.Stderr, cmd.Usage())
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if rangeRate == 0 {
|
||||
rangeRate = math.MaxInt32
|
||||
}
|
||||
limit := rate.NewLimiter(rate.Limit(rangeRate), 1)
|
||||
|
||||
requests := make(chan v3.Op, totalClients)
|
||||
clients := mustCreateClients(totalClients, totalConns)
|
||||
|
||||
bar = pb.New(rangeTotal)
|
||||
bar.Format("Bom !")
|
||||
bar.Start()
|
||||
|
||||
r := newReport()
|
||||
for i := range clients {
|
||||
wg.Add(1)
|
||||
go func(c *v3.Client) {
|
||||
defer wg.Done()
|
||||
for op := range requests {
|
||||
limit.Wait(context.Background())
|
||||
|
||||
st := time.Now()
|
||||
_, err := c.Do(context.Background(), op)
|
||||
r.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
|
||||
bar.Increment()
|
||||
}
|
||||
}(clients[i])
|
||||
}
|
||||
|
||||
go func() {
|
||||
for i := 0; i < rangeTotal; i++ {
|
||||
opts := []v3.OpOption{v3.WithRange(end)}
|
||||
if rangeConsistency == "s" {
|
||||
opts = append(opts, v3.WithSerializable())
|
||||
}
|
||||
op := v3.OpGet(k, opts...)
|
||||
requests <- op
|
||||
}
|
||||
close(requests)
|
||||
}()
|
||||
|
||||
rc := r.Run()
|
||||
wg.Wait()
|
||||
close(r.Results())
|
||||
bar.Finish()
|
||||
fmt.Printf("%s", <-rc)
|
||||
}
|
@ -1,74 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/etcd/pkg/transport"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"gopkg.in/cheggaaa/pb.v1"
|
||||
)
|
||||
|
||||
// This represents the base command when called without any subcommands
|
||||
var RootCmd = &cobra.Command{
|
||||
Use: "benchmark",
|
||||
Short: "A low-level benchmark tool for etcd3",
|
||||
Long: `benchmark is a low-level benchmark tool for etcd3.
|
||||
It uses gRPC client directly and does not depend on
|
||||
etcd client library.
|
||||
`,
|
||||
}
|
||||
|
||||
var (
|
||||
endpoints []string
|
||||
totalConns uint
|
||||
totalClients uint
|
||||
precise bool
|
||||
sample bool
|
||||
|
||||
bar *pb.ProgressBar
|
||||
wg sync.WaitGroup
|
||||
|
||||
tls transport.TLSInfo
|
||||
|
||||
cpuProfPath string
|
||||
memProfPath string
|
||||
|
||||
user string
|
||||
|
||||
dialTimeout time.Duration
|
||||
|
||||
targetLeader bool
|
||||
)
|
||||
|
||||
func init() {
|
||||
RootCmd.PersistentFlags().StringSliceVar(&endpoints, "endpoints", []string{"127.0.0.1:2379"}, "gRPC endpoints")
|
||||
RootCmd.PersistentFlags().UintVar(&totalConns, "conns", 1, "Total number of gRPC connections")
|
||||
RootCmd.PersistentFlags().UintVar(&totalClients, "clients", 1, "Total number of gRPC clients")
|
||||
|
||||
RootCmd.PersistentFlags().BoolVar(&precise, "precise", false, "use full floating point precision")
|
||||
RootCmd.PersistentFlags().BoolVar(&sample, "sample", false, "'true' to sample requests for every second")
|
||||
RootCmd.PersistentFlags().StringVar(&tls.CertFile, "cert", "", "identify HTTPS client using this SSL certificate file")
|
||||
RootCmd.PersistentFlags().StringVar(&tls.KeyFile, "key", "", "identify HTTPS client using this SSL key file")
|
||||
RootCmd.PersistentFlags().StringVar(&tls.CAFile, "cacert", "", "verify certificates of HTTPS-enabled servers using this CA bundle")
|
||||
|
||||
RootCmd.PersistentFlags().StringVar(&user, "user", "", "provide username[:password] and prompt if password is not supplied.")
|
||||
RootCmd.PersistentFlags().DurationVar(&dialTimeout, "dial-timeout", 0, "dial timeout for client connections")
|
||||
|
||||
RootCmd.PersistentFlags().BoolVar(&targetLeader, "target-leader", false, "connect only to the leader node")
|
||||
}
|
@ -1,208 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/rand"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
v3 "github.com/coreos/etcd/clientv3"
|
||||
v3sync "github.com/coreos/etcd/clientv3/concurrency"
|
||||
"github.com/coreos/etcd/etcdserver/api/v3lock/v3lockpb"
|
||||
"github.com/coreos/etcd/pkg/report"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/time/rate"
|
||||
"gopkg.in/cheggaaa/pb.v1"
|
||||
)
|
||||
|
||||
// stmCmd represents the STM benchmark command
|
||||
var stmCmd = &cobra.Command{
|
||||
Use: "stm",
|
||||
Short: "Benchmark STM",
|
||||
|
||||
Run: stmFunc,
|
||||
}
|
||||
|
||||
type stmApply func(v3sync.STM) error
|
||||
|
||||
var (
|
||||
stmIsolation string
|
||||
stmIso v3sync.Isolation
|
||||
|
||||
stmTotal int
|
||||
stmKeysPerTxn int
|
||||
stmKeyCount int
|
||||
stmValSize int
|
||||
stmWritePercent int
|
||||
stmLocker string
|
||||
stmRate int
|
||||
)
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(stmCmd)
|
||||
|
||||
stmCmd.Flags().StringVar(&stmIsolation, "isolation", "r", "Read Committed (c), Repeatable Reads (r), Serializable (s), or Snapshot (ss)")
|
||||
stmCmd.Flags().IntVar(&stmKeyCount, "keys", 1, "Total unique keys accessible by the benchmark")
|
||||
stmCmd.Flags().IntVar(&stmTotal, "total", 10000, "Total number of completed STM transactions")
|
||||
stmCmd.Flags().IntVar(&stmKeysPerTxn, "keys-per-txn", 1, "Number of keys to access per transaction")
|
||||
stmCmd.Flags().IntVar(&stmWritePercent, "txn-wr-percent", 50, "Percentage of keys to overwrite per transaction")
|
||||
stmCmd.Flags().StringVar(&stmLocker, "stm-locker", "stm", "Wrap STM transaction with a custom locking mechanism (stm, lock-client, lock-rpc)")
|
||||
stmCmd.Flags().IntVar(&stmValSize, "val-size", 8, "Value size of each STM put request")
|
||||
stmCmd.Flags().IntVar(&stmRate, "rate", 0, "Maximum STM transactions per second (0 is no limit)")
|
||||
}
|
||||
|
||||
func stmFunc(cmd *cobra.Command, args []string) {
|
||||
if stmKeyCount <= 0 {
|
||||
fmt.Fprintf(os.Stderr, "expected positive --keys, got (%v)", stmKeyCount)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if stmWritePercent < 0 || stmWritePercent > 100 {
|
||||
fmt.Fprintf(os.Stderr, "expected [0, 100] --txn-wr-percent, got (%v)", stmWritePercent)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if stmKeysPerTxn < 0 || stmKeysPerTxn > stmKeyCount {
|
||||
fmt.Fprintf(os.Stderr, "expected --keys-per-txn between 0 and %v, got (%v)", stmKeyCount, stmKeysPerTxn)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
switch stmIsolation {
|
||||
case "c":
|
||||
stmIso = v3sync.ReadCommitted
|
||||
case "r":
|
||||
stmIso = v3sync.RepeatableReads
|
||||
case "s":
|
||||
stmIso = v3sync.Serializable
|
||||
case "ss":
|
||||
stmIso = v3sync.SerializableSnapshot
|
||||
default:
|
||||
fmt.Fprintln(os.Stderr, cmd.Usage())
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if stmRate == 0 {
|
||||
stmRate = math.MaxInt32
|
||||
}
|
||||
limit := rate.NewLimiter(rate.Limit(stmRate), 1)
|
||||
|
||||
requests := make(chan stmApply, totalClients)
|
||||
clients := mustCreateClients(totalClients, totalConns)
|
||||
|
||||
bar = pb.New(stmTotal)
|
||||
bar.Format("Bom !")
|
||||
bar.Start()
|
||||
|
||||
r := newReport()
|
||||
for i := range clients {
|
||||
wg.Add(1)
|
||||
go doSTM(clients[i], requests, r.Results())
|
||||
}
|
||||
|
||||
go func() {
|
||||
for i := 0; i < stmTotal; i++ {
|
||||
kset := make(map[string]struct{})
|
||||
for len(kset) != stmKeysPerTxn {
|
||||
k := make([]byte, 16)
|
||||
binary.PutVarint(k, int64(rand.Intn(stmKeyCount)))
|
||||
s := string(k)
|
||||
kset[s] = struct{}{}
|
||||
}
|
||||
|
||||
applyf := func(s v3sync.STM) error {
|
||||
limit.Wait(context.Background())
|
||||
wrs := int(float32(len(kset)*stmWritePercent) / 100.0)
|
||||
for k := range kset {
|
||||
s.Get(k)
|
||||
if wrs > 0 {
|
||||
s.Put(k, string(mustRandBytes(stmValSize)))
|
||||
wrs--
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
requests <- applyf
|
||||
}
|
||||
close(requests)
|
||||
}()
|
||||
|
||||
rc := r.Run()
|
||||
wg.Wait()
|
||||
close(r.Results())
|
||||
bar.Finish()
|
||||
fmt.Printf("%s", <-rc)
|
||||
}
|
||||
|
||||
func doSTM(client *v3.Client, requests <-chan stmApply, results chan<- report.Result) {
|
||||
defer wg.Done()
|
||||
|
||||
lock, unlock := func() error { return nil }, func() error { return nil }
|
||||
switch stmLocker {
|
||||
case "lock-client":
|
||||
s, err := v3sync.NewSession(client)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer s.Close()
|
||||
m := v3sync.NewMutex(s, "stmlock")
|
||||
lock = func() error { return m.Lock(context.TODO()) }
|
||||
unlock = func() error { return m.Unlock(context.TODO()) }
|
||||
case "lock-rpc":
|
||||
var lockKey []byte
|
||||
s, err := v3sync.NewSession(client)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer s.Close()
|
||||
lc := v3lockpb.NewLockClient(client.ActiveConnection())
|
||||
lock = func() error {
|
||||
req := &v3lockpb.LockRequest{Name: []byte("stmlock"), Lease: int64(s.Lease())}
|
||||
resp, err := lc.Lock(context.TODO(), req)
|
||||
if resp != nil {
|
||||
lockKey = resp.Key
|
||||
}
|
||||
return err
|
||||
}
|
||||
unlock = func() error {
|
||||
req := &v3lockpb.UnlockRequest{Key: lockKey}
|
||||
_, err := lc.Unlock(context.TODO(), req)
|
||||
return err
|
||||
}
|
||||
case "stm":
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "unexpected stm locker %q\n", stmLocker)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
for applyf := range requests {
|
||||
st := time.Now()
|
||||
if lerr := lock(); lerr != nil {
|
||||
panic(lerr)
|
||||
}
|
||||
_, err := v3sync.NewSTM(client, applyf, v3sync.WithIsolation(stmIso))
|
||||
if lerr := unlock(); lerr != nil {
|
||||
panic(lerr)
|
||||
}
|
||||
results <- report.Result{Err: err, Start: st, End: time.Now()}
|
||||
bar.Increment()
|
||||
}
|
||||
}
|
@ -1,108 +0,0 @@
|
||||
// Copyright 2017 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
v3 "github.com/coreos/etcd/clientv3"
|
||||
"github.com/coreos/etcd/pkg/report"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/time/rate"
|
||||
"gopkg.in/cheggaaa/pb.v1"
|
||||
)
|
||||
|
||||
// txnPutCmd represents the txnPut command
|
||||
var txnPutCmd = &cobra.Command{
|
||||
Use: "txn-put",
|
||||
Short: "Benchmark txn-put",
|
||||
|
||||
Run: txnPutFunc,
|
||||
}
|
||||
|
||||
var (
|
||||
txnPutTotal int
|
||||
txnPutRate int
|
||||
txnPutOpsPerTxn int
|
||||
)
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(txnPutCmd)
|
||||
txnPutCmd.Flags().IntVar(&keySize, "key-size", 8, "Key size of txn put")
|
||||
txnPutCmd.Flags().IntVar(&valSize, "val-size", 8, "Value size of txn put")
|
||||
txnPutCmd.Flags().IntVar(&txnPutOpsPerTxn, "txn-ops", 1, "Number of puts per txn")
|
||||
txnPutCmd.Flags().IntVar(&txnPutRate, "rate", 0, "Maximum txns per second (0 is no limit)")
|
||||
|
||||
txnPutCmd.Flags().IntVar(&txnPutTotal, "total", 10000, "Total number of txn requests")
|
||||
txnPutCmd.Flags().IntVar(&keySpaceSize, "key-space-size", 1, "Maximum possible keys")
|
||||
}
|
||||
|
||||
func txnPutFunc(cmd *cobra.Command, args []string) {
|
||||
if keySpaceSize <= 0 {
|
||||
fmt.Fprintf(os.Stderr, "expected positive --key-space-size, got (%v)", keySpaceSize)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
requests := make(chan []v3.Op, totalClients)
|
||||
if txnPutRate == 0 {
|
||||
txnPutRate = math.MaxInt32
|
||||
}
|
||||
limit := rate.NewLimiter(rate.Limit(txnPutRate), 1)
|
||||
clients := mustCreateClients(totalClients, totalConns)
|
||||
k, v := make([]byte, keySize), string(mustRandBytes(valSize))
|
||||
|
||||
bar = pb.New(txnPutTotal)
|
||||
bar.Format("Bom !")
|
||||
bar.Start()
|
||||
|
||||
r := newReport()
|
||||
for i := range clients {
|
||||
wg.Add(1)
|
||||
go func(c *v3.Client) {
|
||||
defer wg.Done()
|
||||
for ops := range requests {
|
||||
limit.Wait(context.Background())
|
||||
st := time.Now()
|
||||
_, err := c.Txn(context.TODO()).Then(ops...).Commit()
|
||||
r.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
|
||||
bar.Increment()
|
||||
}
|
||||
}(clients[i])
|
||||
}
|
||||
|
||||
go func() {
|
||||
for i := 0; i < txnPutTotal; i++ {
|
||||
ops := make([]v3.Op, txnPutOpsPerTxn)
|
||||
for j := 0; j < txnPutOpsPerTxn; j++ {
|
||||
binary.PutVarint(k, int64(((i*txnPutOpsPerTxn)+j)%keySpaceSize))
|
||||
ops[j] = v3.OpPut(string(k), v)
|
||||
}
|
||||
requests <- ops
|
||||
}
|
||||
close(requests)
|
||||
}()
|
||||
|
||||
rc := r.Run()
|
||||
wg.Wait()
|
||||
close(r.Results())
|
||||
bar.Finish()
|
||||
fmt.Println(<-rc)
|
||||
}
|
@ -1,179 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/coreos/etcd/clientv3"
|
||||
"github.com/coreos/etcd/pkg/report"
|
||||
"google.golang.org/grpc/grpclog"
|
||||
"github.com/bgentry/speakeasy"
|
||||
)
|
||||
|
||||
var (
|
||||
// dialTotal counts the number of mustCreateConn calls so that endpoint
|
||||
// connections can be handed out in round-robin order
|
||||
dialTotal int
|
||||
|
||||
// leaderEps is a cache for holding endpoints of a leader node
|
||||
leaderEps []string
|
||||
|
||||
// cache the username and password for multiple connections
|
||||
globalUserName string
|
||||
globalPassword string
|
||||
)
|
||||
|
||||
func mustFindLeaderEndpoints(c *clientv3.Client) {
|
||||
resp, lerr := c.MemberList(context.TODO())
|
||||
if lerr != nil {
|
||||
fmt.Fprintf(os.Stderr, "failed to get a member list: %s\n", lerr)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
leaderId := uint64(0)
|
||||
for _, ep := range c.Endpoints() {
|
||||
if sresp, serr := c.Status(context.TODO(), ep); serr == nil {
|
||||
leaderId = sresp.Leader
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
for _, m := range resp.Members {
|
||||
if m.ID == leaderId {
|
||||
leaderEps = m.ClientURLs
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Fprintf(os.Stderr, "failed to find a leader endpoint\n")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
func getUsernamePassword(usernameFlag string) (string, string, error) {
|
||||
if globalUserName != "" && globalPassword != "" {
|
||||
return globalUserName, globalPassword, nil
|
||||
}
|
||||
colon := strings.Index(usernameFlag, ":")
|
||||
if colon == -1 {
|
||||
// Prompt for the password.
|
||||
password, err := speakeasy.Ask("Password: ")
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
globalUserName = usernameFlag
|
||||
globalPassword = password
|
||||
} else {
|
||||
globalUserName = usernameFlag[:colon]
|
||||
globalPassword = usernameFlag[colon+1:]
|
||||
}
|
||||
return globalUserName, globalPassword, nil
|
||||
}
|
||||
|
||||
func mustCreateConn() *clientv3.Client {
|
||||
connEndpoints := leaderEps
|
||||
if len(connEndpoints) == 0 {
|
||||
connEndpoints = []string{endpoints[dialTotal%len(endpoints)]}
|
||||
dialTotal++
|
||||
}
|
||||
cfg := clientv3.Config{
|
||||
Endpoints: connEndpoints,
|
||||
DialTimeout: dialTimeout,
|
||||
}
|
||||
if !tls.Empty() {
|
||||
cfgtls, err := tls.ClientConfig()
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "bad tls config: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
cfg.TLS = cfgtls
|
||||
}
|
||||
|
||||
if len(user) != 0 {
|
||||
username, password, err := getUsernamePassword(user)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "bad user information: %s %v\n", user, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
cfg.Username = username
|
||||
cfg.Password = password
|
||||
|
||||
}
|
||||
|
||||
client, err := clientv3.New(cfg)
|
||||
if targetLeader && len(leaderEps) == 0 {
|
||||
mustFindLeaderEndpoints(client)
|
||||
client.Close()
|
||||
return mustCreateConn()
|
||||
}
|
||||
|
||||
clientv3.SetLogger(grpclog.NewLoggerV2(os.Stderr, os.Stderr, os.Stderr))
|
||||
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "dial error: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
return client
|
||||
}
|
||||
|
||||
func mustCreateClients(totalClients, totalConns uint) []*clientv3.Client {
|
||||
conns := make([]*clientv3.Client, totalConns)
|
||||
for i := range conns {
|
||||
conns[i] = mustCreateConn()
|
||||
}
|
||||
|
||||
clients := make([]*clientv3.Client, totalClients)
|
||||
for i := range clients {
|
||||
clients[i] = conns[i%int(totalConns)]
|
||||
}
|
||||
return clients
|
||||
}
|
||||
|
||||
func mustRandBytes(n int) []byte {
|
||||
rb := make([]byte, n)
|
||||
_, err := rand.Read(rb)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "failed to generate value: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
return rb
|
||||
}
|
||||
|
||||
func newReport() report.Report {
|
||||
p := "%4.4f"
|
||||
if precise {
|
||||
p = "%g"
|
||||
}
|
||||
if sample {
|
||||
return report.NewReportSample(p)
|
||||
}
|
||||
return report.NewReport(p)
|
||||
}
|
||||
|
||||
func newWeightedReport() report.Report {
|
||||
p := "%4.4f"
|
||||
if precise {
|
||||
p = "%g"
|
||||
}
|
||||
if sample {
|
||||
return report.NewReportSample(p)
|
||||
}
|
||||
return report.NewWeightedReport(report.NewReport(p), p)
|
||||
}
|
@ -1,247 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/etcd/clientv3"
|
||||
"github.com/coreos/etcd/pkg/report"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/time/rate"
|
||||
"gopkg.in/cheggaaa/pb.v1"
|
||||
)
|
||||
|
||||
// watchCmd represents the watch command
|
||||
var watchCmd = &cobra.Command{
|
||||
Use: "watch",
|
||||
Short: "Benchmark watch",
|
||||
Long: `Benchmark watch tests the performance of processing watch requests and
|
||||
sending events to watchers. It tests the sending performance by
|
||||
changing the value of the watched keys with concurrent put
|
||||
requests.
|
||||
|
||||
During the test, each watcher watches (--total/--watchers) keys
|
||||
|
||||
(a watcher might watch on the same key multiple times if
|
||||
--watched-key-total is small).
|
||||
|
||||
Each key is watched by (--total/--watched-key-total) watchers.
|
||||
`,
|
||||
Run: watchFunc,
|
||||
}
|
||||
|
||||
var (
|
||||
watchStreams int
|
||||
watchWatchesPerStream int
|
||||
watchedKeyTotal int
|
||||
|
||||
watchPutRate int
|
||||
watchPutTotal int
|
||||
|
||||
watchKeySize int
|
||||
watchKeySpaceSize int
|
||||
watchSeqKeys bool
|
||||
)
|
||||
|
||||
type watchedKeys struct {
|
||||
watched []string
|
||||
numWatchers map[string]int
|
||||
|
||||
watches []clientv3.WatchChan
|
||||
|
||||
// ctx to control all watches
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(watchCmd)
|
||||
watchCmd.Flags().IntVar(&watchStreams, "streams", 10, "Total watch streams")
|
||||
watchCmd.Flags().IntVar(&watchWatchesPerStream, "watch-per-stream", 100, "Total watchers per stream")
|
||||
watchCmd.Flags().IntVar(&watchedKeyTotal, "watched-key-total", 1, "Total number of keys to be watched")
|
||||
|
||||
watchCmd.Flags().IntVar(&watchPutRate, "put-rate", 0, "Number of keys to put per second")
|
||||
watchCmd.Flags().IntVar(&watchPutTotal, "put-total", 1000, "Number of put requests")
|
||||
|
||||
watchCmd.Flags().IntVar(&watchKeySize, "key-size", 32, "Key size of watch request")
|
||||
watchCmd.Flags().IntVar(&watchKeySpaceSize, "key-space-size", 1, "Maximum possible keys")
|
||||
watchCmd.Flags().BoolVar(&watchSeqKeys, "sequential-keys", false, "Use sequential keys")
|
||||
}
|
||||
|
||||
func watchFunc(cmd *cobra.Command, args []string) {
|
||||
if watchKeySpaceSize <= 0 {
|
||||
fmt.Fprintf(os.Stderr, "expected positive --key-space-size, got (%v)", watchKeySpaceSize)
|
||||
os.Exit(1)
|
||||
}
|
||||
grpcConns := int(totalClients)
|
||||
if totalClients > totalConns {
|
||||
grpcConns = int(totalConns)
|
||||
}
|
||||
wantedConns := 1 + (watchStreams / 100)
|
||||
if grpcConns < wantedConns {
|
||||
fmt.Fprintf(os.Stderr, "warning: grpc limits 100 streams per client connection, have %d but need %d\n", grpcConns, wantedConns)
|
||||
}
|
||||
clients := mustCreateClients(totalClients, totalConns)
|
||||
wk := newWatchedKeys()
|
||||
benchMakeWatches(clients, wk)
|
||||
benchPutWatches(clients, wk)
|
||||
}
|
||||
|
||||
func benchMakeWatches(clients []*clientv3.Client, wk *watchedKeys) {
|
||||
streams := make([]clientv3.Watcher, watchStreams)
|
||||
for i := range streams {
|
||||
streams[i] = clientv3.NewWatcher(clients[i%len(clients)])
|
||||
}
|
||||
|
||||
keyc := make(chan string, watchStreams)
|
||||
bar = pb.New(watchStreams * watchWatchesPerStream)
|
||||
bar.Format("Bom !")
|
||||
bar.Start()
|
||||
|
||||
r := newReport()
|
||||
rch := r.Results()
|
||||
|
||||
wg.Add(len(streams) + 1)
|
||||
wc := make(chan []clientv3.WatchChan, len(streams))
|
||||
for _, s := range streams {
|
||||
go func(s clientv3.Watcher) {
|
||||
defer wg.Done()
|
||||
var ws []clientv3.WatchChan
|
||||
for i := 0; i < watchWatchesPerStream; i++ {
|
||||
k := <-keyc
|
||||
st := time.Now()
|
||||
wch := s.Watch(wk.ctx, k)
|
||||
rch <- report.Result{Start: st, End: time.Now()}
|
||||
ws = append(ws, wch)
|
||||
bar.Increment()
|
||||
}
|
||||
wc <- ws
|
||||
}(s)
|
||||
}
|
||||
go func() {
|
||||
defer func() {
|
||||
close(keyc)
|
||||
wg.Done()
|
||||
}()
|
||||
for i := 0; i < watchStreams*watchWatchesPerStream; i++ {
|
||||
key := wk.watched[i%len(wk.watched)]
|
||||
keyc <- key
|
||||
wk.numWatchers[key]++
|
||||
}
|
||||
}()
|
||||
|
||||
rc := r.Run()
|
||||
wg.Wait()
|
||||
bar.Finish()
|
||||
close(r.Results())
|
||||
fmt.Printf("Watch creation summary:\n%s", <-rc)
|
||||
|
||||
for i := 0; i < len(streams); i++ {
|
||||
wk.watches = append(wk.watches, (<-wc)...)
|
||||
}
|
||||
}
|
||||
|
||||
func newWatchedKeys() *watchedKeys {
|
||||
watched := make([]string, watchedKeyTotal)
|
||||
for i := range watched {
|
||||
k := make([]byte, watchKeySize)
|
||||
if watchSeqKeys {
|
||||
binary.PutVarint(k, int64(i%watchKeySpaceSize))
|
||||
} else {
|
||||
binary.PutVarint(k, int64(rand.Intn(watchKeySpaceSize)))
|
||||
}
|
||||
watched[i] = string(k)
|
||||
}
|
||||
ctx, cancel := context.WithCancel(context.TODO())
|
||||
return &watchedKeys{
|
||||
watched: watched,
|
||||
numWatchers: make(map[string]int),
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
}
|
||||
}
|
||||
|
||||
func benchPutWatches(clients []*clientv3.Client, wk *watchedKeys) {
|
||||
eventsTotal := 0
|
||||
for i := 0; i < watchPutTotal; i++ {
|
||||
eventsTotal += wk.numWatchers[wk.watched[i%len(wk.watched)]]
|
||||
}
|
||||
|
||||
bar = pb.New(eventsTotal)
|
||||
bar.Format("Bom !")
|
||||
bar.Start()
|
||||
|
||||
r := newReport()
|
||||
|
||||
wg.Add(len(wk.watches))
|
||||
nrRxed := int32(eventsTotal)
|
||||
for _, w := range wk.watches {
|
||||
go func(wc clientv3.WatchChan) {
|
||||
defer wg.Done()
|
||||
recvWatchChan(wc, r.Results(), &nrRxed)
|
||||
wk.cancel()
|
||||
}(w)
|
||||
}
|
||||
|
||||
putreqc := make(chan clientv3.Op, len(clients))
|
||||
go func() {
|
||||
defer close(putreqc)
|
||||
for i := 0; i < watchPutTotal; i++ {
|
||||
putreqc <- clientv3.OpPut(wk.watched[i%(len(wk.watched))], "data")
|
||||
}
|
||||
}()
|
||||
|
||||
limit := rate.NewLimiter(rate.Limit(watchPutRate), 1)
|
||||
for _, cc := range clients {
|
||||
go func(c *clientv3.Client) {
|
||||
for op := range putreqc {
|
||||
if err := limit.Wait(context.TODO()); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if _, err := c.Do(context.TODO(), op); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
}(cc)
|
||||
}
|
||||
|
||||
rc := r.Run()
|
||||
wg.Wait()
|
||||
bar.Finish()
|
||||
close(r.Results())
|
||||
fmt.Printf("Watch events received summary:\n%s", <-rc)
|
||||
|
||||
}
|
||||
|
||||
func recvWatchChan(wch clientv3.WatchChan, results chan<- report.Result, nrRxed *int32) {
|
||||
for r := range wch {
|
||||
st := time.Now()
|
||||
for range r.Events {
|
||||
results <- report.Result{Start: st, End: time.Now()}
|
||||
bar.Increment()
|
||||
if atomic.AddInt32(nrRxed, -1) <= 0 {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,118 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
v3 "github.com/coreos/etcd/clientv3"
|
||||
"github.com/coreos/etcd/pkg/report"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"gopkg.in/cheggaaa/pb.v1"
|
||||
)
|
||||
|
||||
// watchGetCmd represents the watch command
|
||||
var watchGetCmd = &cobra.Command{
|
||||
Use: "watch-get",
|
||||
Short: "Benchmark watch with get",
|
||||
Long: `Benchmark for serialized key gets with many unsynced watchers`,
|
||||
Run: watchGetFunc,
|
||||
}
|
||||
|
||||
var (
|
||||
watchGetTotalWatchers int
|
||||
watchGetTotalStreams int
|
||||
watchEvents int
|
||||
firstWatch sync.Once
|
||||
)
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(watchGetCmd)
|
||||
watchGetCmd.Flags().IntVar(&watchGetTotalWatchers, "watchers", 10000, "Total number of watchers")
|
||||
watchGetCmd.Flags().IntVar(&watchGetTotalStreams, "streams", 1, "Total number of watcher streams")
|
||||
watchGetCmd.Flags().IntVar(&watchEvents, "events", 8, "Number of events per watcher")
|
||||
}
|
||||
|
||||
func watchGetFunc(cmd *cobra.Command, args []string) {
|
||||
clients := mustCreateClients(totalClients, totalConns)
|
||||
getClient := mustCreateClients(1, 1)
|
||||
|
||||
// setup keys for watchers
|
||||
watchRev := int64(0)
|
||||
for i := 0; i < watchEvents; i++ {
|
||||
v := fmt.Sprintf("%d", i)
|
||||
resp, err := clients[0].Put(context.TODO(), "watchkey", v)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if i == 0 {
|
||||
watchRev = resp.Header.Revision
|
||||
}
|
||||
}
|
||||
|
||||
streams := make([]v3.Watcher, watchGetTotalStreams)
|
||||
for i := range streams {
|
||||
streams[i] = v3.NewWatcher(clients[i%len(clients)])
|
||||
}
|
||||
|
||||
bar = pb.New(watchGetTotalWatchers * watchEvents)
|
||||
bar.Format("Bom !")
|
||||
bar.Start()
|
||||
|
||||
// report from trying to do serialized gets with concurrent watchers
|
||||
r := newReport()
|
||||
ctx, cancel := context.WithCancel(context.TODO())
|
||||
f := func() {
|
||||
defer close(r.Results())
|
||||
for {
|
||||
st := time.Now()
|
||||
_, err := getClient[0].Get(ctx, "abc", v3.WithSerializable())
|
||||
if ctx.Err() != nil {
|
||||
break
|
||||
}
|
||||
r.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
|
||||
}
|
||||
}
|
||||
|
||||
wg.Add(watchGetTotalWatchers)
|
||||
for i := 0; i < watchGetTotalWatchers; i++ {
|
||||
go doUnsyncWatch(streams[i%len(streams)], watchRev, f)
|
||||
}
|
||||
|
||||
rc := r.Run()
|
||||
wg.Wait()
|
||||
cancel()
|
||||
bar.Finish()
|
||||
fmt.Printf("Get during watch summary:\n%s", <-rc)
|
||||
}
|
||||
|
||||
func doUnsyncWatch(stream v3.Watcher, rev int64, f func()) {
|
||||
defer wg.Done()
|
||||
wch := stream.Watch(context.TODO(), "watchkey", v3.WithRev(rev))
|
||||
if wch == nil {
|
||||
panic("could not open watch channel")
|
||||
}
|
||||
firstWatch.Do(func() { go f() })
|
||||
i := 0
|
||||
for i < watchEvents {
|
||||
wev := <-wch
|
||||
i += len(wev.Events)
|
||||
bar.Add(len(wev.Events))
|
||||
}
|
||||
}
|
@ -1,111 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/etcd/clientv3"
|
||||
"github.com/coreos/etcd/pkg/report"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/time/rate"
|
||||
"gopkg.in/cheggaaa/pb.v1"
|
||||
)
|
||||
|
||||
// watchLatencyCmd represents the watch latency command
|
||||
var watchLatencyCmd = &cobra.Command{
|
||||
Use: "watch-latency",
|
||||
Short: "Benchmark watch latency",
|
||||
Long: `Benchmarks the latency for watches by measuring
|
||||
the latency between writing to a key and receiving the
|
||||
associated watch response.`,
|
||||
Run: watchLatencyFunc,
|
||||
}
|
||||
|
||||
var (
|
||||
watchLTotal int
|
||||
watchLPutRate int
|
||||
watchLKeySize int
|
||||
watchLValueSize int
|
||||
)
|
||||
|
||||
func init() {
|
||||
RootCmd.AddCommand(watchLatencyCmd)
|
||||
watchLatencyCmd.Flags().IntVar(&watchLTotal, "total", 10000, "Total number of put requests")
|
||||
watchLatencyCmd.Flags().IntVar(&watchLPutRate, "put-rate", 100, "Number of keys to put per second")
|
||||
watchLatencyCmd.Flags().IntVar(&watchLKeySize, "key-size", 32, "Key size of watch response")
|
||||
watchLatencyCmd.Flags().IntVar(&watchLValueSize, "val-size", 32, "Value size of watch response")
|
||||
}
|
||||
|
||||
func watchLatencyFunc(cmd *cobra.Command, args []string) {
|
||||
key := string(mustRandBytes(watchLKeySize))
|
||||
value := string(mustRandBytes(watchLValueSize))
|
||||
|
||||
clients := mustCreateClients(totalClients, totalConns)
|
||||
putClient := mustCreateConn()
|
||||
|
||||
wchs := make([]clientv3.WatchChan, len(clients))
|
||||
for i := range wchs {
|
||||
wchs[i] = clients[i].Watch(context.TODO(), key)
|
||||
}
|
||||
|
||||
bar = pb.New(watchLTotal)
|
||||
bar.Format("Bom !")
|
||||
bar.Start()
|
||||
|
||||
limiter := rate.NewLimiter(rate.Limit(watchLPutRate), watchLPutRate)
|
||||
r := newReport()
|
||||
rc := r.Run()
|
||||
|
||||
for i := 0; i < watchLTotal; i++ {
|
||||
// limit key put as per reqRate
|
||||
if err := limiter.Wait(context.TODO()); err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
var st time.Time
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(clients))
|
||||
barrierc := make(chan struct{})
|
||||
for _, wch := range wchs {
|
||||
ch := wch
|
||||
go func() {
|
||||
<-barrierc
|
||||
<-ch
|
||||
r.Results() <- report.Result{Start: st, End: time.Now()}
|
||||
wg.Done()
|
||||
}()
|
||||
}
|
||||
|
||||
if _, err := putClient.Put(context.TODO(), key, value); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Failed to Put for watch latency benchmark: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
st = time.Now()
|
||||
close(barrierc)
|
||||
wg.Wait()
|
||||
bar.Increment()
|
||||
}
|
||||
|
||||
close(r.Results())
|
||||
bar.Finish()
|
||||
fmt.Printf("%s", <-rc)
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// benchmark is a program for benchmarking etcd v3 API performance.
|
||||
package main
|
@ -1,29 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/coreos/etcd/tools/benchmark/cmd"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if err := cmd.RootCmd.Execute(); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(-1)
|
||||
}
|
||||
}
|
@ -1,74 +0,0 @@
|
||||
### etcd-dump-db
|
||||
|
||||
etcd-dump-db inspects etcd db files.
|
||||
|
||||
```
|
||||
Usage:
|
||||
etcd-dump-db [command]
|
||||
|
||||
Available Commands:
|
||||
list-bucket bucket lists all buckets.
|
||||
iterate-bucket iterate-bucket lists key-value pairs in reverse order.
|
||||
hash hash computes the hash of db file.
|
||||
|
||||
Flags:
|
||||
-h, --help[=false]: help for etcd-dump-db
|
||||
|
||||
Use "etcd-dump-db [command] --help" for more information about a command.
|
||||
```
|
||||
|
||||
|
||||
#### list-bucket [data dir or db file path]
|
||||
|
||||
Lists all buckets.
|
||||
|
||||
```
|
||||
$ etcd-dump-db list-bucket agent01/agent.etcd
|
||||
|
||||
alarm
|
||||
auth
|
||||
authRoles
|
||||
authUsers
|
||||
cluster
|
||||
key
|
||||
lease
|
||||
members
|
||||
members_removed
|
||||
meta
|
||||
```
|
||||
|
||||
|
||||
#### hash [data dir or db file path]
|
||||
|
||||
Computes the hash of db file.
|
||||
|
||||
```
|
||||
$ etcd-dump-db hash agent01/agent.etcd
|
||||
db path: agent01/agent.etcd/member/snap/db
|
||||
Hash: 3700260467
|
||||
|
||||
|
||||
$ etcd-dump-db hash agent02/agent.etcd
|
||||
|
||||
db path: agent02/agent.etcd/member/snap/db
|
||||
Hash: 3700260467
|
||||
|
||||
|
||||
$ etcd-dump-db hash agent03/agent.etcd
|
||||
|
||||
db path: agent03/agent.etcd/member/snap/db
|
||||
Hash: 3700260467
|
||||
```
|
||||
|
||||
|
||||
#### iterate-bucket [data dir or db file path]
|
||||
|
||||
Lists key-value pairs in reverse order.
|
||||
|
||||
```
|
||||
$ etcd-dump-db iterate-bucket agent03/agent.etcd --bucket=key --limit 3
|
||||
|
||||
key="\x00\x00\x00\x00\x005@x_\x00\x00\x00\x00\x00\x00\x00\tt", value="\n\x153640412599896088633_9"
|
||||
key="\x00\x00\x00\x00\x005@x_\x00\x00\x00\x00\x00\x00\x00\bt", value="\n\x153640412599896088633_8"
|
||||
key="\x00\x00\x00\x00\x005@x_\x00\x00\x00\x00\x00\x00\x00\at", value="\n\x153640412599896088633_7"
|
||||
```
|
@ -1,83 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
bolt "github.com/coreos/bbolt"
|
||||
"github.com/coreos/etcd/mvcc"
|
||||
"github.com/coreos/etcd/mvcc/backend"
|
||||
)
|
||||
|
||||
func snapDir(dataDir string) string {
|
||||
return filepath.Join(dataDir, "member", "snap")
|
||||
}
|
||||
|
||||
func getBuckets(dbPath string) (buckets []string, err error) {
|
||||
db, derr := bolt.Open(dbPath, 0600, &bolt.Options{})
|
||||
if derr != nil {
|
||||
return nil, derr
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
err = db.View(func(tx *bolt.Tx) error {
|
||||
return tx.ForEach(func(b []byte, _ *bolt.Bucket) error {
|
||||
buckets = append(buckets, string(b))
|
||||
return nil
|
||||
})
|
||||
})
|
||||
return buckets, err
|
||||
}
|
||||
|
||||
func iterateBucket(dbPath, bucket string, limit uint64) (err error) {
|
||||
db, derr := bolt.Open(dbPath, 0600, &bolt.Options{})
|
||||
if derr != nil {
|
||||
return derr
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
err = db.View(func(tx *bolt.Tx) error {
|
||||
b := tx.Bucket([]byte(bucket))
|
||||
if b == nil {
|
||||
return fmt.Errorf("got nil bucket for %s", bucket)
|
||||
}
|
||||
|
||||
c := b.Cursor()
|
||||
|
||||
// iterate in reverse order (use First() and Next() for ascending order)
|
||||
for k, v := c.Last(); k != nil; k, v = c.Prev() {
|
||||
fmt.Printf("key=%q, value=%q\n", k, v)
|
||||
|
||||
limit--
|
||||
if limit == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func getHash(dbPath string) (hash uint32, err error) {
|
||||
b := backend.NewDefaultBackend(dbPath)
|
||||
return b.Hash(mvcc.DefaultIgnores)
|
||||
}
|
||||
|
||||
// TODO: revert by revision and find specified hash value
|
||||
// currently, it's hard because lease is in separate bucket
|
||||
// and does not modify revision
|
@ -1,16 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// etcd-dump-db inspects etcd db files.
|
||||
package main
|
@ -1,130 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
var (
|
||||
rootCommand = &cobra.Command{
|
||||
Use: "etcd-dump-db",
|
||||
Short: "etcd-dump-db inspects etcd db files.",
|
||||
}
|
||||
listBucketCommand = &cobra.Command{
|
||||
Use: "list-bucket [data dir or db file path]",
|
||||
Short: "bucket lists all buckets.",
|
||||
Run: listBucketCommandFunc,
|
||||
}
|
||||
iterateBucketCommand = &cobra.Command{
|
||||
Use: "iterate-bucket [data dir or db file path]",
|
||||
Short: "iterate-bucket lists key-value pairs in reverse order.",
|
||||
Run: iterateBucketCommandFunc,
|
||||
}
|
||||
getHashCommand = &cobra.Command{
|
||||
Use: "hash [data dir or db file path]",
|
||||
Short: "hash computes the hash of db file.",
|
||||
Run: getHashCommandFunc,
|
||||
}
|
||||
)
|
||||
|
||||
var (
|
||||
iterateBucketName string
|
||||
iterateBucketLimit uint64
|
||||
)
|
||||
|
||||
func init() {
|
||||
iterateBucketCommand.PersistentFlags().StringVar(&iterateBucketName, "bucket", "", "bucket name to iterate")
|
||||
iterateBucketCommand.PersistentFlags().Uint64Var(&iterateBucketLimit, "limit", 0, "max number of key-value pairs to iterate (0< to iterate all)")
|
||||
|
||||
rootCommand.AddCommand(listBucketCommand)
|
||||
rootCommand.AddCommand(iterateBucketCommand)
|
||||
rootCommand.AddCommand(getHashCommand)
|
||||
}
|
||||
|
||||
func main() {
|
||||
if err := rootCommand.Execute(); err != nil {
|
||||
fmt.Fprintln(os.Stdout, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func listBucketCommandFunc(cmd *cobra.Command, args []string) {
|
||||
if len(args) < 1 {
|
||||
log.Fatalf("Must provide at least 1 argument (got %v)", args)
|
||||
}
|
||||
dp := args[0]
|
||||
if !strings.HasSuffix(dp, "db") {
|
||||
dp = filepath.Join(snapDir(dp), "db")
|
||||
}
|
||||
if !existFileOrDir(dp) {
|
||||
log.Fatalf("%q does not exist", dp)
|
||||
}
|
||||
|
||||
bts, err := getBuckets(dp)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
for _, b := range bts {
|
||||
fmt.Println(b)
|
||||
}
|
||||
}
|
||||
|
||||
func iterateBucketCommandFunc(cmd *cobra.Command, args []string) {
|
||||
if len(args) < 1 {
|
||||
log.Fatalf("Must provide at least 1 argument (got %v)", args)
|
||||
}
|
||||
dp := args[0]
|
||||
if !strings.HasSuffix(dp, "db") {
|
||||
dp = filepath.Join(snapDir(dp), "db")
|
||||
}
|
||||
if !existFileOrDir(dp) {
|
||||
log.Fatalf("%q does not exist", dp)
|
||||
}
|
||||
|
||||
if iterateBucketName == "" {
|
||||
log.Fatal("got empty bucket name")
|
||||
}
|
||||
|
||||
err := iterateBucket(dp, iterateBucketName, iterateBucketLimit)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func getHashCommandFunc(cmd *cobra.Command, args []string) {
|
||||
if len(args) < 1 {
|
||||
log.Fatalf("Must provide at least 1 argument (got %v)", args)
|
||||
}
|
||||
dp := args[0]
|
||||
if !strings.HasSuffix(dp, "db") {
|
||||
dp = filepath.Join(snapDir(dp), "db")
|
||||
}
|
||||
if !existFileOrDir(dp) {
|
||||
log.Fatalf("%q does not exist", dp)
|
||||
}
|
||||
|
||||
hash, err := getHash(dp)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
fmt.Printf("db path: %s\nHash: %d\n", dp, hash)
|
||||
}
|
@ -1,22 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import "os"
|
||||
|
||||
func existFileOrDir(name string) bool {
|
||||
_, err := os.Stat(name)
|
||||
return err == nil
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// etcd-dump-logs is a program for analyzing etcd server write ahead logs.
|
||||
package main
|
@ -1,162 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/etcd/etcdserver/etcdserverpb"
|
||||
"github.com/coreos/etcd/pkg/pbutil"
|
||||
"github.com/coreos/etcd/pkg/types"
|
||||
"github.com/coreos/etcd/raft/raftpb"
|
||||
"github.com/coreos/etcd/snap"
|
||||
"github.com/coreos/etcd/wal"
|
||||
"github.com/coreos/etcd/wal/walpb"
|
||||
)
|
||||
|
||||
func main() {
|
||||
from := flag.String("data-dir", "", "")
|
||||
snapfile := flag.String("start-snap", "", "The base name of snapshot file to start dumping")
|
||||
index := flag.Uint64("start-index", 0, "The index to start dumping")
|
||||
flag.Parse()
|
||||
if *from == "" {
|
||||
log.Fatal("Must provide -data-dir flag.")
|
||||
}
|
||||
if *snapfile != "" && *index != 0 {
|
||||
log.Fatal("start-snap and start-index flags cannot be used together.")
|
||||
}
|
||||
|
||||
var (
|
||||
walsnap walpb.Snapshot
|
||||
snapshot *raftpb.Snapshot
|
||||
err error
|
||||
)
|
||||
|
||||
isIndex := *index != 0
|
||||
|
||||
if isIndex {
|
||||
fmt.Printf("Start dumping log entries from index %d.\n", *index)
|
||||
walsnap.Index = *index
|
||||
} else {
|
||||
if *snapfile == "" {
|
||||
ss := snap.New(snapDir(*from))
|
||||
snapshot, err = ss.Load()
|
||||
} else {
|
||||
snapshot, err = snap.Read(filepath.Join(snapDir(*from), *snapfile))
|
||||
}
|
||||
|
||||
switch err {
|
||||
case nil:
|
||||
walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term
|
||||
nodes := genIDSlice(snapshot.Metadata.ConfState.Nodes)
|
||||
fmt.Printf("Snapshot:\nterm=%d index=%d nodes=%s\n",
|
||||
walsnap.Term, walsnap.Index, nodes)
|
||||
case snap.ErrNoSnapshot:
|
||||
fmt.Printf("Snapshot:\nempty\n")
|
||||
default:
|
||||
log.Fatalf("Failed loading snapshot: %v", err)
|
||||
}
|
||||
fmt.Println("Start dupmping log entries from snapshot.")
|
||||
}
|
||||
|
||||
w, err := wal.OpenForRead(walDir(*from), walsnap)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed opening WAL: %v", err)
|
||||
}
|
||||
wmetadata, state, ents, err := w.ReadAll()
|
||||
w.Close()
|
||||
if err != nil && (!isIndex || err != wal.ErrSnapshotNotFound) {
|
||||
log.Fatalf("Failed reading WAL: %v", err)
|
||||
}
|
||||
id, cid := parseWALMetadata(wmetadata)
|
||||
vid := types.ID(state.Vote)
|
||||
fmt.Printf("WAL metadata:\nnodeID=%s clusterID=%s term=%d commitIndex=%d vote=%s\n",
|
||||
id, cid, state.Term, state.Commit, vid)
|
||||
|
||||
fmt.Printf("WAL entries:\n")
|
||||
fmt.Printf("lastIndex=%d\n", ents[len(ents)-1].Index)
|
||||
fmt.Printf("%4s\t%10s\ttype\tdata\n", "term", "index")
|
||||
for _, e := range ents {
|
||||
msg := fmt.Sprintf("%4d\t%10d", e.Term, e.Index)
|
||||
switch e.Type {
|
||||
case raftpb.EntryNormal:
|
||||
msg = fmt.Sprintf("%s\tnorm", msg)
|
||||
|
||||
var rr etcdserverpb.InternalRaftRequest
|
||||
if err := rr.Unmarshal(e.Data); err == nil {
|
||||
msg = fmt.Sprintf("%s\t%s", msg, rr.String())
|
||||
break
|
||||
}
|
||||
|
||||
var r etcdserverpb.Request
|
||||
if err := r.Unmarshal(e.Data); err == nil {
|
||||
switch r.Method {
|
||||
case "":
|
||||
msg = fmt.Sprintf("%s\tnoop", msg)
|
||||
case "SYNC":
|
||||
msg = fmt.Sprintf("%s\tmethod=SYNC time=%q", msg, time.Unix(0, r.Time))
|
||||
case "QGET", "DELETE":
|
||||
msg = fmt.Sprintf("%s\tmethod=%s path=%s", msg, r.Method, excerpt(r.Path, 64, 64))
|
||||
default:
|
||||
msg = fmt.Sprintf("%s\tmethod=%s path=%s val=%s", msg, r.Method, excerpt(r.Path, 64, 64), excerpt(r.Val, 128, 0))
|
||||
}
|
||||
break
|
||||
}
|
||||
msg = fmt.Sprintf("%s\t???", msg)
|
||||
case raftpb.EntryConfChange:
|
||||
msg = fmt.Sprintf("%s\tconf", msg)
|
||||
var r raftpb.ConfChange
|
||||
if err := r.Unmarshal(e.Data); err != nil {
|
||||
msg = fmt.Sprintf("%s\t???", msg)
|
||||
} else {
|
||||
msg = fmt.Sprintf("%s\tmethod=%s id=%s", msg, r.Type, types.ID(r.NodeID))
|
||||
}
|
||||
}
|
||||
fmt.Println(msg)
|
||||
}
|
||||
}
|
||||
|
||||
func walDir(dataDir string) string { return filepath.Join(dataDir, "member", "wal") }
|
||||
|
||||
func snapDir(dataDir string) string { return filepath.Join(dataDir, "member", "snap") }
|
||||
|
||||
func parseWALMetadata(b []byte) (id, cid types.ID) {
|
||||
var metadata etcdserverpb.Metadata
|
||||
pbutil.MustUnmarshal(&metadata, b)
|
||||
id = types.ID(metadata.NodeID)
|
||||
cid = types.ID(metadata.ClusterID)
|
||||
return id, cid
|
||||
}
|
||||
|
||||
func genIDSlice(a []uint64) []types.ID {
|
||||
ids := make([]types.ID, len(a))
|
||||
for i, id := range a {
|
||||
ids[i] = types.ID(id)
|
||||
}
|
||||
return ids
|
||||
}
|
||||
|
||||
// excerpt replaces middle part with ellipsis and returns a double-quoted
|
||||
// string safely escaped with Go syntax.
|
||||
func excerpt(str string, pre, suf int) string {
|
||||
if pre+suf > len(str) {
|
||||
return fmt.Sprintf("%q", str)
|
||||
}
|
||||
return fmt.Sprintf("%q...%q", str[:pre], str[len(str)-suf:])
|
||||
}
|
@ -1,47 +0,0 @@
|
||||
# etcd functional test suite
|
||||
|
||||
etcd functional test suite tests the functionality of an etcd cluster with a focus on failure resistance under high pressure. It sets up an etcd cluster and inject failures into the cluster by killing the process or isolate the network of the process. It expects the etcd cluster to recover within a short amount of time after fixing the fault.
|
||||
|
||||
etcd functional test suite has two components: etcd-agent and etcd-tester. etcd-agent runs on every test machines and etcd-tester is a single controller of the test. etcd-tester controls all the etcd-agent to start etcd clusters and simulate various failure cases.
|
||||
|
||||
## requirements
|
||||
|
||||
The environment of the cluster must be stable enough, so etcd test suite can assume that most of the failures are generated by itself.
|
||||
|
||||
## etcd agent
|
||||
|
||||
etcd agent is a daemon on each machines. It can start, stop, restart, isolate and terminate an etcd process. The agent exposes these functionality via HTTP RPC.
|
||||
|
||||
## etcd tester
|
||||
|
||||
etcd functional tester control the progress of the functional tests. It calls the RPC of the etcd agent to simulate various test cases. For example, it can start a three members cluster by sending three start RPC calls to three different etcd agents. It can make one of the member failed by sending stop RPC call to one etcd agent.
|
||||
|
||||
### Run locally
|
||||
|
||||
```
|
||||
$ PASSES=functional ./test
|
||||
```
|
||||
|
||||
### Run with Docker
|
||||
|
||||
To run locally, first build tester image:
|
||||
|
||||
```bash
|
||||
pushd ../..
|
||||
|
||||
GO_VERSION=1.9.3 \
|
||||
make build-docker-functional-tester \
|
||||
-f ./hack/scripts-dev/Makefile
|
||||
|
||||
popd
|
||||
```
|
||||
|
||||
And run [example scripts](./scripts).
|
||||
|
||||
```bash
|
||||
./scripts/agent-1.sh
|
||||
./scripts/agent-2.sh
|
||||
./scripts/agent-3.sh
|
||||
|
||||
./scripts/tester-limit.sh
|
||||
```
|
@ -1,11 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
if ! [[ "$0" =~ "tools/functional-tester/build" ]]; then
|
||||
echo "must be run from repository root"
|
||||
exit 255
|
||||
fi
|
||||
|
||||
CGO_ENABLED=0 go build -a -installsuffix cgo -ldflags "-s" -o bin/etcd-agent ./cmd/tools/functional-tester/etcd-agent
|
||||
CGO_ENABLED=0 go build -a -installsuffix cgo -ldflags "-s" -o bin/etcd-tester ./cmd/tools/functional-tester/etcd-tester
|
||||
CGO_ENABLED=0 go build -a -installsuffix cgo -ldflags "-s" -o bin/etcd-runner ./cmd/tools/functional-tester/etcd-runner
|
||||
|
@ -1,372 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/etcd/pkg/fileutil"
|
||||
"github.com/coreos/etcd/pkg/transport"
|
||||
"github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
|
||||
)
|
||||
|
||||
const (
|
||||
stateUninitialized = "uninitialized"
|
||||
stateStarted = "started"
|
||||
stateStopped = "stopped"
|
||||
stateTerminated = "terminated"
|
||||
)
|
||||
|
||||
type Agent struct {
|
||||
state string // the state of etcd process
|
||||
|
||||
cmd *exec.Cmd
|
||||
logfile *os.File
|
||||
|
||||
cfg AgentConfig
|
||||
|
||||
pmu sync.Mutex
|
||||
advertisePortToProxy map[int]transport.Proxy
|
||||
}
|
||||
|
||||
type AgentConfig struct {
|
||||
EtcdPath string
|
||||
LogDir string
|
||||
FailpointAddr string
|
||||
}
|
||||
|
||||
func newAgent(cfg AgentConfig) (*Agent, error) {
|
||||
// check if the file exists
|
||||
_, err := os.Stat(cfg.EtcdPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
c := exec.Command(cfg.EtcdPath)
|
||||
|
||||
err = fileutil.TouchDirAll(cfg.LogDir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var f *os.File
|
||||
f, err = os.Create(filepath.Join(cfg.LogDir, "etcd.log"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Agent{
|
||||
state: stateUninitialized,
|
||||
cmd: c,
|
||||
logfile: f,
|
||||
cfg: cfg,
|
||||
advertisePortToProxy: make(map[int]transport.Proxy),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// start starts a new etcd process with the given args.
|
||||
func (a *Agent) start(args ...string) error {
|
||||
args = append(args, "--data-dir", a.dataDir())
|
||||
a.cmd = exec.Command(a.cmd.Path, args...)
|
||||
a.cmd.Env = []string{"GOFAIL_HTTP=" + a.cfg.FailpointAddr}
|
||||
a.cmd.Stdout = a.logfile
|
||||
a.cmd.Stderr = a.logfile
|
||||
err := a.cmd.Start()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
a.state = stateStarted
|
||||
|
||||
a.pmu.Lock()
|
||||
defer a.pmu.Unlock()
|
||||
if len(a.advertisePortToProxy) == 0 {
|
||||
// enough time for etcd start before setting up proxy
|
||||
time.Sleep(time.Second)
|
||||
var (
|
||||
err error
|
||||
s string
|
||||
listenClientURL *url.URL
|
||||
advertiseClientURL *url.URL
|
||||
advertiseClientURLPort int
|
||||
listenPeerURL *url.URL
|
||||
advertisePeerURL *url.URL
|
||||
advertisePeerURLPort int
|
||||
)
|
||||
for i := range args {
|
||||
switch args[i] {
|
||||
case "--listen-client-urls":
|
||||
listenClientURL, err = url.Parse(args[i+1])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case "--advertise-client-urls":
|
||||
advertiseClientURL, err = url.Parse(args[i+1])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, s, err = net.SplitHostPort(advertiseClientURL.Host)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
advertiseClientURLPort, err = strconv.Atoi(s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case "--listen-peer-urls":
|
||||
listenPeerURL, err = url.Parse(args[i+1])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case "--initial-advertise-peer-urls":
|
||||
advertisePeerURL, err = url.Parse(args[i+1])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, s, err = net.SplitHostPort(advertisePeerURL.Host)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
advertisePeerURLPort, err = strconv.Atoi(s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clientProxy := transport.NewProxy(transport.ProxyConfig{
|
||||
From: *advertiseClientURL,
|
||||
To: *listenClientURL,
|
||||
})
|
||||
select {
|
||||
case err = <-clientProxy.Error():
|
||||
return err
|
||||
case <-time.After(time.Second):
|
||||
}
|
||||
a.advertisePortToProxy[advertiseClientURLPort] = clientProxy
|
||||
|
||||
peerProxy := transport.NewProxy(transport.ProxyConfig{
|
||||
From: *advertisePeerURL,
|
||||
To: *listenPeerURL,
|
||||
})
|
||||
select {
|
||||
case err = <-peerProxy.Error():
|
||||
return err
|
||||
case <-time.After(time.Second):
|
||||
}
|
||||
a.advertisePortToProxy[advertisePeerURLPort] = peerProxy
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// stop stops the existing etcd process the agent started.
|
||||
func (a *Agent) stopWithSig(sig os.Signal) error {
|
||||
if a.state != stateStarted {
|
||||
return nil
|
||||
}
|
||||
|
||||
a.pmu.Lock()
|
||||
if len(a.advertisePortToProxy) > 0 {
|
||||
for _, p := range a.advertisePortToProxy {
|
||||
if err := p.Close(); err != nil {
|
||||
a.pmu.Unlock()
|
||||
return err
|
||||
}
|
||||
select {
|
||||
case <-p.Done():
|
||||
// enough time to release port
|
||||
time.Sleep(time.Second)
|
||||
case <-time.After(time.Second):
|
||||
}
|
||||
}
|
||||
a.advertisePortToProxy = make(map[int]transport.Proxy)
|
||||
}
|
||||
a.pmu.Unlock()
|
||||
|
||||
err := stopWithSig(a.cmd, sig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
a.state = stateStopped
|
||||
return nil
|
||||
}
|
||||
|
||||
func stopWithSig(cmd *exec.Cmd, sig os.Signal) error {
|
||||
err := cmd.Process.Signal(sig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
errc := make(chan error)
|
||||
go func() {
|
||||
_, ew := cmd.Process.Wait()
|
||||
errc <- ew
|
||||
close(errc)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-time.After(5 * time.Second):
|
||||
cmd.Process.Kill()
|
||||
case e := <-errc:
|
||||
return e
|
||||
}
|
||||
err = <-errc
|
||||
return err
|
||||
}
|
||||
|
||||
// restart restarts the stopped etcd process.
|
||||
func (a *Agent) restart() error {
|
||||
return a.start(a.cmd.Args[1:]...)
|
||||
}
|
||||
|
||||
func (a *Agent) cleanup() error {
|
||||
// exit with stackstrace
|
||||
if err := a.stopWithSig(syscall.SIGQUIT); err != nil {
|
||||
return err
|
||||
}
|
||||
a.state = stateUninitialized
|
||||
|
||||
a.logfile.Close()
|
||||
if err := archiveLogAndDataDir(a.cfg.LogDir, a.dataDir()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := fileutil.TouchDirAll(a.cfg.LogDir); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
f, err := os.Create(filepath.Join(a.cfg.LogDir, "etcd.log"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
a.logfile = f
|
||||
|
||||
// https://www.kernel.org/doc/Documentation/sysctl/vm.txt
|
||||
// https://github.com/torvalds/linux/blob/master/fs/drop_caches.c
|
||||
cmd := exec.Command("/bin/sh", "-c", `echo "echo 1 > /proc/sys/vm/drop_caches" | sudo sh`)
|
||||
if err := cmd.Run(); err != nil {
|
||||
plog.Infof("error when cleaning page cache (%v)", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// terminate stops the exiting etcd process the agent started
|
||||
// and removes the data dir.
|
||||
func (a *Agent) terminate() error {
|
||||
err := a.stopWithSig(syscall.SIGTERM)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = os.RemoveAll(a.dataDir())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
a.state = stateTerminated
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Agent) dropPort(port int) error {
|
||||
a.pmu.Lock()
|
||||
defer a.pmu.Unlock()
|
||||
|
||||
p, ok := a.advertisePortToProxy[port]
|
||||
if !ok {
|
||||
return fmt.Errorf("%d does not have proxy", port)
|
||||
}
|
||||
p.BlackholeTx()
|
||||
p.BlackholeRx()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Agent) recoverPort(port int) error {
|
||||
a.pmu.Lock()
|
||||
defer a.pmu.Unlock()
|
||||
|
||||
p, ok := a.advertisePortToProxy[port]
|
||||
if !ok {
|
||||
return fmt.Errorf("%d does not have proxy", port)
|
||||
}
|
||||
p.UnblackholeTx()
|
||||
p.UnblackholeRx()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Agent) setLatency(ms, rv int) error {
|
||||
a.pmu.Lock()
|
||||
defer a.pmu.Unlock()
|
||||
|
||||
if ms == 0 {
|
||||
for _, p := range a.advertisePortToProxy {
|
||||
p.UndelayTx()
|
||||
p.UndelayRx()
|
||||
}
|
||||
}
|
||||
for _, p := range a.advertisePortToProxy {
|
||||
p.DelayTx(time.Duration(ms)*time.Millisecond, time.Duration(rv)*time.Millisecond)
|
||||
p.DelayRx(time.Duration(ms)*time.Millisecond, time.Duration(rv)*time.Millisecond)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Agent) status() client.Status {
|
||||
return client.Status{State: a.state}
|
||||
}
|
||||
|
||||
func (a *Agent) dataDir() string {
|
||||
return filepath.Join(a.cfg.LogDir, "etcd.data")
|
||||
}
|
||||
|
||||
func existDir(fpath string) bool {
|
||||
st, err := os.Stat(fpath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return false
|
||||
}
|
||||
} else {
|
||||
return st.IsDir()
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func archiveLogAndDataDir(logDir string, datadir string) error {
|
||||
dir := filepath.Join(logDir, "failure_archive", time.Now().Format(time.RFC3339))
|
||||
if existDir(dir) {
|
||||
dir = filepath.Join(logDir, "failure_archive", time.Now().Add(time.Second).Format(time.RFC3339))
|
||||
}
|
||||
if err := fileutil.TouchDirAll(dir); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Rename(filepath.Join(logDir, "etcd.log"), filepath.Join(dir, "etcd.log")); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := os.Rename(datadir, filepath.Join(dir, filepath.Base(datadir))); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
@ -1,87 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var etcdPath = filepath.Join(os.Getenv("GOPATH"), "bin/etcd")
|
||||
|
||||
func TestAgentStart(t *testing.T) {
|
||||
defer os.Remove("etcd.log")
|
||||
|
||||
a := newTestAgent(t)
|
||||
defer a.terminate()
|
||||
|
||||
err := a.start()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentRestart(t *testing.T) {
|
||||
defer os.Remove("etcd.log")
|
||||
|
||||
a := newTestAgent(t)
|
||||
defer a.terminate()
|
||||
|
||||
err := a.start()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = a.stopWithSig(syscall.SIGTERM)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = a.restart()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgentTerminate(t *testing.T) {
|
||||
defer os.Remove("etcd.log")
|
||||
|
||||
a := newTestAgent(t)
|
||||
|
||||
err := a.start()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = a.terminate()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, err := os.Stat(a.dataDir()); !os.IsNotExist(err) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// newTestAgent creates a test agent
|
||||
func newTestAgent(t *testing.T) *Agent {
|
||||
a, err := newAgent(AgentConfig{EtcdPath: etcdPath, LogDir: "etcd.log"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
return a
|
||||
}
|
@ -1,118 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package client
|
||||
|
||||
import "net/rpc"
|
||||
|
||||
type Status struct {
|
||||
// State gives the human-readable status of an agent (e.g., "started" or "terminated")
|
||||
State string
|
||||
|
||||
// TODO: gather more informations
|
||||
// TODO: memory usage, raft information, etc..
|
||||
}
|
||||
|
||||
type Agent interface {
|
||||
ID() uint64
|
||||
// Start starts a new etcd with the given args on the agent machine.
|
||||
Start(args ...string) (int, error)
|
||||
// Stop stops the existing etcd the agent started.
|
||||
Stop() error
|
||||
// Restart restarts the existing etcd the agent stopped.
|
||||
Restart() (int, error)
|
||||
// Cleanup stops the exiting etcd the agent started, then archives log and its data dir.
|
||||
Cleanup() error
|
||||
// Terminate stops the exiting etcd the agent started and removes its data dir.
|
||||
Terminate() error
|
||||
// DropPort drops all network packets at the given port.
|
||||
DropPort(port int) error
|
||||
// RecoverPort stops dropping all network packets at the given port.
|
||||
RecoverPort(port int) error
|
||||
// SetLatency slows down network by introducing latency.
|
||||
SetLatency(ms, rv int) error
|
||||
// RemoveLatency removes latency introduced by SetLatency.
|
||||
RemoveLatency() error
|
||||
// Status returns the status of etcd on the agent
|
||||
Status() (Status, error)
|
||||
}
|
||||
|
||||
type agent struct {
|
||||
endpoint string
|
||||
rpcClient *rpc.Client
|
||||
}
|
||||
|
||||
func NewAgent(endpoint string) (Agent, error) {
|
||||
c, err := rpc.DialHTTP("tcp", endpoint)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &agent{endpoint, c}, nil
|
||||
}
|
||||
|
||||
func (a *agent) Start(args ...string) (int, error) {
|
||||
var pid int
|
||||
err := a.rpcClient.Call("Agent.RPCStart", args, &pid)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return pid, nil
|
||||
}
|
||||
|
||||
func (a *agent) Stop() error {
|
||||
return a.rpcClient.Call("Agent.RPCStop", struct{}{}, nil)
|
||||
}
|
||||
|
||||
func (a *agent) Restart() (int, error) {
|
||||
var pid int
|
||||
err := a.rpcClient.Call("Agent.RPCRestart", struct{}{}, &pid)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return pid, nil
|
||||
}
|
||||
|
||||
func (a *agent) Cleanup() error {
|
||||
return a.rpcClient.Call("Agent.RPCCleanup", struct{}{}, nil)
|
||||
}
|
||||
|
||||
func (a *agent) Terminate() error {
|
||||
return a.rpcClient.Call("Agent.RPCTerminate", struct{}{}, nil)
|
||||
}
|
||||
|
||||
func (a *agent) DropPort(port int) error {
|
||||
return a.rpcClient.Call("Agent.RPCDropPort", port, nil)
|
||||
}
|
||||
|
||||
func (a *agent) RecoverPort(port int) error {
|
||||
return a.rpcClient.Call("Agent.RPCRecoverPort", port, nil)
|
||||
}
|
||||
|
||||
func (a *agent) SetLatency(ms, rv int) error {
|
||||
return a.rpcClient.Call("Agent.RPCSetLatency", []int{ms, rv}, nil)
|
||||
}
|
||||
|
||||
func (a *agent) RemoveLatency() error {
|
||||
return a.rpcClient.Call("Agent.RPCRemoveLatency", struct{}{}, nil)
|
||||
}
|
||||
|
||||
func (a *agent) Status() (Status, error) {
|
||||
var s Status
|
||||
err := a.rpcClient.Call("Agent.RPCStatus", struct{}{}, &s)
|
||||
return s, err
|
||||
}
|
||||
|
||||
func (a *agent) ID() uint64 {
|
||||
panic("not implemented")
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package client provides a client implementation to control an etcd-agent.
|
||||
package client
|
@ -1,16 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// etcd-agent is a daemon for controlling an etcd process via HTTP RPC.
|
||||
package main
|
@ -1,47 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/coreos/pkg/capnslog"
|
||||
)
|
||||
|
||||
var plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "etcd-agent")
|
||||
|
||||
func main() {
|
||||
etcdPath := flag.String("etcd-path", filepath.Join(os.Getenv("GOPATH"), "bin/etcd"), "the path to etcd binary")
|
||||
etcdLogDir := flag.String("etcd-log-dir", "etcd-log", "directory to store etcd logs, data directories, failure archive")
|
||||
port := flag.String("port", ":9027", "port to serve agent server")
|
||||
failpointAddr := flag.String("failpoint-addr", ":2381", "interface for gofail's HTTP server")
|
||||
flag.Parse()
|
||||
|
||||
cfg := AgentConfig{
|
||||
EtcdPath: *etcdPath,
|
||||
LogDir: *etcdLogDir,
|
||||
FailpointAddr: *failpointAddr,
|
||||
}
|
||||
a, err := newAgent(cfg)
|
||||
if err != nil {
|
||||
plog.Fatal(err)
|
||||
}
|
||||
a.serveRPC(*port)
|
||||
|
||||
var done chan struct{}
|
||||
<-done
|
||||
}
|
@ -1,131 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/rpc"
|
||||
"syscall"
|
||||
|
||||
"github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
|
||||
)
|
||||
|
||||
func (a *Agent) serveRPC(port string) {
|
||||
rpc.Register(a)
|
||||
rpc.HandleHTTP()
|
||||
l, e := net.Listen("tcp", port)
|
||||
if e != nil {
|
||||
plog.Fatal(e)
|
||||
}
|
||||
plog.Println("agent listening on", port)
|
||||
go http.Serve(l, nil)
|
||||
}
|
||||
|
||||
func (a *Agent) RPCStart(args []string, pid *int) error {
|
||||
plog.Printf("start etcd with args %v", args)
|
||||
err := a.start(args...)
|
||||
if err != nil {
|
||||
plog.Println("error starting etcd", err)
|
||||
return err
|
||||
}
|
||||
*pid = a.cmd.Process.Pid
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Agent) RPCStop(args struct{}, reply *struct{}) error {
|
||||
plog.Printf("stop etcd")
|
||||
err := a.stopWithSig(syscall.SIGTERM)
|
||||
if err != nil {
|
||||
plog.Println("error stopping etcd", err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Agent) RPCRestart(args struct{}, pid *int) error {
|
||||
plog.Printf("restart etcd")
|
||||
err := a.restart()
|
||||
if err != nil {
|
||||
plog.Println("error restarting etcd", err)
|
||||
return err
|
||||
}
|
||||
*pid = a.cmd.Process.Pid
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Agent) RPCCleanup(args struct{}, reply *struct{}) error {
|
||||
plog.Printf("cleanup etcd")
|
||||
err := a.cleanup()
|
||||
if err != nil {
|
||||
plog.Println("error cleaning up etcd", err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Agent) RPCTerminate(args struct{}, reply *struct{}) error {
|
||||
plog.Printf("terminate etcd")
|
||||
err := a.terminate()
|
||||
if err != nil {
|
||||
plog.Println("error terminating etcd", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Agent) RPCDropPort(port int, reply *struct{}) error {
|
||||
plog.Printf("drop port %d", port)
|
||||
err := a.dropPort(port)
|
||||
if err != nil {
|
||||
plog.Println("error dropping port", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Agent) RPCRecoverPort(port int, reply *struct{}) error {
|
||||
plog.Printf("recover port %d", port)
|
||||
err := a.recoverPort(port)
|
||||
if err != nil {
|
||||
plog.Println("error recovering port", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Agent) RPCSetLatency(args []int, reply *struct{}) error {
|
||||
if len(args) != 2 {
|
||||
return fmt.Errorf("SetLatency needs two args, got (%v)", args)
|
||||
}
|
||||
plog.Printf("set latency of %dms (+/- %dms)", args[0], args[1])
|
||||
err := a.setLatency(args[0], args[1])
|
||||
if err != nil {
|
||||
plog.Println("error setting latency", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Agent) RPCRemoveLatency(args struct{}, reply *struct{}) error {
|
||||
plog.Println("removing latency")
|
||||
err := a.setLatency(0, 0)
|
||||
if err != nil {
|
||||
plog.Println("error removing latency")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a *Agent) RPCStatus(args struct{}, status *client.Status) error {
|
||||
*status = a.status()
|
||||
return nil
|
||||
}
|
@ -1,166 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"net/rpc"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
|
||||
)
|
||||
|
||||
func init() {
|
||||
defaultAgent, err := newAgent(AgentConfig{EtcdPath: etcdPath, LogDir: "etcd.log"})
|
||||
if err != nil {
|
||||
log.Panic(err)
|
||||
}
|
||||
defaultAgent.serveRPC(":9027")
|
||||
}
|
||||
|
||||
func TestRPCStart(t *testing.T) {
|
||||
c, err := rpc.DialHTTP("tcp", ":9027")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
dir, err := ioutil.TempDir(os.TempDir(), "etcd-agent")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
var pid int
|
||||
err = c.Call("Agent.RPCStart", []string{"--data-dir", dir}, &pid)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer c.Call("Agent.RPCTerminate", struct{}{}, nil)
|
||||
|
||||
_, err = os.FindProcess(pid)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %v when find process %d", err, pid)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRPCRestart(t *testing.T) {
|
||||
c, err := rpc.DialHTTP("tcp", ":9027")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
dir, err := ioutil.TempDir(os.TempDir(), "etcd-agent")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
var pid int
|
||||
err = c.Call("Agent.RPCStart", []string{"--data-dir", dir}, &pid)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer c.Call("Agent.RPCTerminate", struct{}{}, nil)
|
||||
|
||||
err = c.Call("Agent.RPCStop", struct{}{}, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
var npid int
|
||||
err = c.Call("Agent.RPCRestart", struct{}{}, &npid)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if npid == pid {
|
||||
t.Errorf("pid = %v, want not equal to %d", npid, pid)
|
||||
}
|
||||
|
||||
s, err := os.FindProcess(pid)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %v when find process %d", err, pid)
|
||||
}
|
||||
_, err = s.Wait()
|
||||
if err == nil {
|
||||
t.Errorf("err = nil, want killed error")
|
||||
}
|
||||
_, err = os.FindProcess(npid)
|
||||
if err != nil {
|
||||
t.Errorf("unexpected error %v when find process %d", err, npid)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRPCTerminate(t *testing.T) {
|
||||
c, err := rpc.DialHTTP("tcp", ":9027")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
dir, err := ioutil.TempDir(os.TempDir(), "etcd-agent")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
var pid int
|
||||
err = c.Call("Agent.RPCStart", []string{"--data-dir", dir}, &pid)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = c.Call("Agent.RPCTerminate", struct{}{}, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, err := os.Stat(dir); !os.IsNotExist(err) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRPCStatus(t *testing.T) {
|
||||
c, err := rpc.DialHTTP("tcp", ":9027")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var s client.Status
|
||||
err = c.Call("Agent.RPCStatus", struct{}{}, &s)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if s.State != stateTerminated {
|
||||
t.Errorf("state = %s, want %s", s.State, stateTerminated)
|
||||
}
|
||||
|
||||
dir, err := ioutil.TempDir(os.TempDir(), "etcd-agent")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
var pid int
|
||||
err = c.Call("Agent.RPCStart", []string{"--data-dir", dir}, &pid)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = c.Call("Agent.RPCStatus", struct{}{}, &s)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if s.State != stateStarted {
|
||||
t.Errorf("state = %s, want %s", s.State, stateStarted)
|
||||
}
|
||||
|
||||
err = c.Call("Agent.RPCTerminate", struct{}{}, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
@ -1,144 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package command
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/coreos/etcd/clientv3/concurrency"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
// NewElectionCommand returns the cobra command for "election runner".
|
||||
func NewElectionCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "election [election name (defaults to 'elector')]",
|
||||
Short: "Performs election operation",
|
||||
Run: runElectionFunc,
|
||||
}
|
||||
cmd.Flags().IntVar(&totalClientConnections, "total-client-connections", 10, "total number of client connections")
|
||||
return cmd
|
||||
}
|
||||
|
||||
func runElectionFunc(cmd *cobra.Command, args []string) {
|
||||
election := "elector"
|
||||
if len(args) == 1 {
|
||||
election = args[0]
|
||||
}
|
||||
if len(args) > 1 {
|
||||
ExitWithError(ExitBadArgs, errors.New("election takes at most one argument"))
|
||||
}
|
||||
|
||||
rcs := make([]roundClient, totalClientConnections)
|
||||
validatec := make(chan struct{}, len(rcs))
|
||||
// nextc closes when election is ready for next round.
|
||||
nextc := make(chan struct{})
|
||||
eps := endpointsFromFlag(cmd)
|
||||
|
||||
for i := range rcs {
|
||||
v := fmt.Sprintf("%d", i)
|
||||
observedLeader := ""
|
||||
validateWaiters := 0
|
||||
var rcNextc chan struct{}
|
||||
setRcNextc := func() {
|
||||
rcNextc = nextc
|
||||
}
|
||||
|
||||
rcs[i].c = newClient(eps, dialTimeout)
|
||||
var (
|
||||
s *concurrency.Session
|
||||
err error
|
||||
)
|
||||
for {
|
||||
s, err = concurrency.NewSession(rcs[i].c)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
e := concurrency.NewElection(s, election)
|
||||
rcs[i].acquire = func() (err error) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
donec := make(chan struct{})
|
||||
go func() {
|
||||
defer close(donec)
|
||||
for ctx.Err() == nil {
|
||||
if ol, ok := <-e.Observe(ctx); ok {
|
||||
observedLeader = string(ol.Kvs[0].Value)
|
||||
break
|
||||
}
|
||||
}
|
||||
if observedLeader != v {
|
||||
cancel()
|
||||
}
|
||||
}()
|
||||
err = e.Campaign(ctx, v)
|
||||
cancel()
|
||||
<-donec
|
||||
if err == nil {
|
||||
observedLeader = v
|
||||
}
|
||||
if observedLeader == v {
|
||||
validateWaiters = len(rcs)
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
rcs[i].validate = func() error {
|
||||
l, err := e.Leader(context.TODO())
|
||||
if err == nil && string(l.Kvs[0].Value) != observedLeader {
|
||||
return fmt.Errorf("expected leader %q, got %q", observedLeader, l.Kvs[0].Value)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
setRcNextc()
|
||||
validatec <- struct{}{}
|
||||
return nil
|
||||
}
|
||||
rcs[i].release = func() error {
|
||||
for validateWaiters > 0 {
|
||||
select {
|
||||
case <-validatec:
|
||||
validateWaiters--
|
||||
default:
|
||||
return fmt.Errorf("waiting on followers")
|
||||
}
|
||||
}
|
||||
if err := e.Resign(context.TODO()); err != nil {
|
||||
return err
|
||||
}
|
||||
if observedLeader == v {
|
||||
oldNextc := nextc
|
||||
nextc = make(chan struct{})
|
||||
close(oldNextc)
|
||||
|
||||
}
|
||||
<-rcNextc
|
||||
observedLeader = ""
|
||||
return nil
|
||||
}
|
||||
}
|
||||
// each client creates 1 key from Campaign() and delete it from Resign()
|
||||
// a round involves in 2*len(rcs) requests.
|
||||
doRounds(rcs, rounds, 2*len(rcs))
|
||||
}
|
@ -1,42 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package command
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/coreos/etcd/client"
|
||||
)
|
||||
|
||||
const (
|
||||
// http://tldp.org/LDP/abs/html/exitcodes.html
|
||||
ExitSuccess = iota
|
||||
ExitError
|
||||
ExitBadConnection
|
||||
ExitInvalidInput // for txn, watch command
|
||||
ExitBadFeature // provided a valid flag with an unsupported value
|
||||
ExitInterrupted
|
||||
ExitIO
|
||||
ExitBadArgs = 128
|
||||
)
|
||||
|
||||
func ExitWithError(code int, err error) {
|
||||
fmt.Fprintln(os.Stderr, "Error: ", err)
|
||||
if cerr, ok := err.(*client.ClusterError); ok {
|
||||
fmt.Fprintln(os.Stderr, cerr.Detail())
|
||||
}
|
||||
os.Exit(code)
|
||||
}
|
@ -1,114 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package command
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/etcd/clientv3"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/time/rate"
|
||||
)
|
||||
|
||||
// shared flags
|
||||
var (
|
||||
totalClientConnections int // total number of client connections to be made with server
|
||||
endpoints []string
|
||||
dialTimeout time.Duration
|
||||
rounds int // total number of rounds to run; set to <= 0 to run forever.
|
||||
reqRate int // maximum number of requests per second.
|
||||
)
|
||||
|
||||
type roundClient struct {
|
||||
c *clientv3.Client
|
||||
progress int
|
||||
acquire func() error
|
||||
validate func() error
|
||||
release func() error
|
||||
}
|
||||
|
||||
func newClient(eps []string, timeout time.Duration) *clientv3.Client {
|
||||
c, err := clientv3.New(clientv3.Config{
|
||||
Endpoints: eps,
|
||||
DialTimeout: time.Duration(timeout) * time.Second,
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
func doRounds(rcs []roundClient, rounds int, requests int) {
|
||||
var wg sync.WaitGroup
|
||||
|
||||
wg.Add(len(rcs))
|
||||
finished := make(chan struct{})
|
||||
limiter := rate.NewLimiter(rate.Limit(reqRate), reqRate)
|
||||
for i := range rcs {
|
||||
go func(rc *roundClient) {
|
||||
defer wg.Done()
|
||||
for rc.progress < rounds || rounds <= 0 {
|
||||
if err := limiter.WaitN(context.Background(), requests/len(rcs)); err != nil {
|
||||
log.Panicf("rate limiter error %v", err)
|
||||
}
|
||||
|
||||
for rc.acquire() != nil { /* spin */
|
||||
}
|
||||
|
||||
if err := rc.validate(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
rc.progress++
|
||||
finished <- struct{}{}
|
||||
|
||||
for rc.release() != nil { /* spin */
|
||||
}
|
||||
}
|
||||
}(&rcs[i])
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
for i := 1; i < len(rcs)*rounds+1 || rounds <= 0; i++ {
|
||||
select {
|
||||
case <-finished:
|
||||
if i%100 == 0 {
|
||||
fmt.Printf("finished %d, took %v\n", i, time.Since(start))
|
||||
start = time.Now()
|
||||
}
|
||||
case <-time.After(time.Minute):
|
||||
log.Panic("no progress after 1 minute!")
|
||||
}
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
for _, rc := range rcs {
|
||||
rc.c.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func endpointsFromFlag(cmd *cobra.Command) []string {
|
||||
endpoints, err := cmd.Flags().GetStringSlice("endpoints")
|
||||
if err != nil {
|
||||
ExitWithError(ExitError, err)
|
||||
}
|
||||
return endpoints
|
||||
}
|
@ -1,174 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// copied from https://github.com/rkt/rkt/blob/master/rkt/help.go
|
||||
|
||||
package command
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"text/tabwriter"
|
||||
"text/template"
|
||||
|
||||
"github.com/coreos/etcd/version"
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/spf13/pflag"
|
||||
)
|
||||
|
||||
var (
|
||||
commandUsageTemplate *template.Template
|
||||
templFuncs = template.FuncMap{
|
||||
"descToLines": func(s string) []string {
|
||||
// trim leading/trailing whitespace and split into slice of lines
|
||||
return strings.Split(strings.Trim(s, "\n\t "), "\n")
|
||||
},
|
||||
"cmdName": func(cmd *cobra.Command, startCmd *cobra.Command) string {
|
||||
parts := []string{cmd.Name()}
|
||||
for cmd.HasParent() && cmd.Parent().Name() != startCmd.Name() {
|
||||
cmd = cmd.Parent()
|
||||
parts = append([]string{cmd.Name()}, parts...)
|
||||
}
|
||||
return strings.Join(parts, " ")
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
func init() {
|
||||
commandUsage := `
|
||||
{{ $cmd := .Cmd }}\
|
||||
{{ $cmdname := cmdName .Cmd .Cmd.Root }}\
|
||||
NAME:
|
||||
{{ if not .Cmd.HasParent }}\
|
||||
{{printf "\t%s - %s" .Cmd.Name .Cmd.Short}}
|
||||
{{else}}\
|
||||
{{printf "\t%s - %s" $cmdname .Cmd.Short}}
|
||||
{{end}}\
|
||||
|
||||
USAGE:
|
||||
{{printf "\t%s" .Cmd.UseLine}}
|
||||
{{ if not .Cmd.HasParent }}\
|
||||
|
||||
VERSION:
|
||||
{{printf "\t%s" .Version}}
|
||||
{{end}}\
|
||||
{{if .Cmd.HasSubCommands}}\
|
||||
|
||||
API VERSION:
|
||||
{{printf "\t%s" .APIVersion}}
|
||||
{{end}}\
|
||||
{{if .Cmd.HasSubCommands}}\
|
||||
|
||||
|
||||
COMMANDS:
|
||||
{{range .SubCommands}}\
|
||||
{{ $cmdname := cmdName . $cmd }}\
|
||||
{{ if .Runnable }}\
|
||||
{{printf "\t%s\t%s" $cmdname .Short}}
|
||||
{{end}}\
|
||||
{{end}}\
|
||||
{{end}}\
|
||||
{{ if .Cmd.Long }}\
|
||||
|
||||
DESCRIPTION:
|
||||
{{range $line := descToLines .Cmd.Long}}{{printf "\t%s" $line}}
|
||||
{{end}}\
|
||||
{{end}}\
|
||||
{{if .Cmd.HasLocalFlags}}\
|
||||
|
||||
OPTIONS:
|
||||
{{.LocalFlags}}\
|
||||
{{end}}\
|
||||
{{if .Cmd.HasInheritedFlags}}\
|
||||
|
||||
GLOBAL OPTIONS:
|
||||
{{.GlobalFlags}}\
|
||||
{{end}}
|
||||
`[1:]
|
||||
|
||||
commandUsageTemplate = template.Must(template.New("command_usage").Funcs(templFuncs).Parse(strings.Replace(commandUsage, "\\\n", "", -1)))
|
||||
}
|
||||
|
||||
func etcdFlagUsages(flagSet *pflag.FlagSet) string {
|
||||
x := new(bytes.Buffer)
|
||||
|
||||
flagSet.VisitAll(func(flag *pflag.Flag) {
|
||||
if len(flag.Deprecated) > 0 {
|
||||
return
|
||||
}
|
||||
var format string
|
||||
if len(flag.Shorthand) > 0 {
|
||||
format = " -%s, --%s"
|
||||
} else {
|
||||
format = " %s --%s"
|
||||
}
|
||||
if len(flag.NoOptDefVal) > 0 {
|
||||
format = format + "["
|
||||
}
|
||||
if flag.Value.Type() == "string" {
|
||||
// put quotes on the value
|
||||
format = format + "=%q"
|
||||
} else {
|
||||
format = format + "=%s"
|
||||
}
|
||||
if len(flag.NoOptDefVal) > 0 {
|
||||
format = format + "]"
|
||||
}
|
||||
format = format + "\t%s\n"
|
||||
shorthand := flag.Shorthand
|
||||
fmt.Fprintf(x, format, shorthand, flag.Name, flag.DefValue, flag.Usage)
|
||||
})
|
||||
|
||||
return x.String()
|
||||
}
|
||||
|
||||
func getSubCommands(cmd *cobra.Command) []*cobra.Command {
|
||||
var subCommands []*cobra.Command
|
||||
for _, subCmd := range cmd.Commands() {
|
||||
subCommands = append(subCommands, subCmd)
|
||||
subCommands = append(subCommands, getSubCommands(subCmd)...)
|
||||
}
|
||||
return subCommands
|
||||
}
|
||||
|
||||
func usageFunc(cmd *cobra.Command) error {
|
||||
subCommands := getSubCommands(cmd)
|
||||
tabOut := getTabOutWithWriter(os.Stdout)
|
||||
commandUsageTemplate.Execute(tabOut, struct {
|
||||
Cmd *cobra.Command
|
||||
LocalFlags string
|
||||
GlobalFlags string
|
||||
SubCommands []*cobra.Command
|
||||
Version string
|
||||
APIVersion string
|
||||
}{
|
||||
cmd,
|
||||
etcdFlagUsages(cmd.LocalFlags()),
|
||||
etcdFlagUsages(cmd.InheritedFlags()),
|
||||
subCommands,
|
||||
version.Version,
|
||||
version.APIVersion,
|
||||
})
|
||||
tabOut.Flush()
|
||||
return nil
|
||||
}
|
||||
|
||||
func getTabOutWithWriter(writer io.Writer) *tabwriter.Writer {
|
||||
aTabOut := new(tabwriter.Writer)
|
||||
aTabOut.Init(writer, 0, 8, 1, '\t', 0)
|
||||
return aTabOut
|
||||
}
|
@ -1,92 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package command
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/etcd/clientv3"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
)
|
||||
|
||||
var (
|
||||
leaseTTL int64
|
||||
)
|
||||
|
||||
// NewLeaseRenewerCommand returns the cobra command for "lease-renewer runner".
|
||||
func NewLeaseRenewerCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "lease-renewer",
|
||||
Short: "Performs lease renew operation",
|
||||
Run: runLeaseRenewerFunc,
|
||||
}
|
||||
cmd.Flags().Int64Var(&leaseTTL, "ttl", 5, "lease's ttl")
|
||||
return cmd
|
||||
}
|
||||
|
||||
func runLeaseRenewerFunc(cmd *cobra.Command, args []string) {
|
||||
if len(args) > 0 {
|
||||
ExitWithError(ExitBadArgs, errors.New("lease-renewer does not take any argument"))
|
||||
}
|
||||
|
||||
eps := endpointsFromFlag(cmd)
|
||||
c := newClient(eps, dialTimeout)
|
||||
ctx := context.Background()
|
||||
|
||||
for {
|
||||
var (
|
||||
l *clientv3.LeaseGrantResponse
|
||||
lk *clientv3.LeaseKeepAliveResponse
|
||||
err error
|
||||
)
|
||||
for {
|
||||
l, err = c.Lease.Grant(ctx, leaseTTL)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
expire := time.Now().Add(time.Duration(l.TTL-1) * time.Second)
|
||||
|
||||
for {
|
||||
lk, err = c.Lease.KeepAliveOnce(ctx, l.ID)
|
||||
ev, _ := status.FromError(err)
|
||||
if ev.Code() == codes.NotFound {
|
||||
if time.Since(expire) < 0 {
|
||||
log.Fatalf("bad renew! exceeded: %v", time.Since(expire))
|
||||
for {
|
||||
lk, err = c.Lease.KeepAliveOnce(ctx, l.ID)
|
||||
fmt.Println(lk, err)
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
}
|
||||
log.Fatalf("lost lease %d, expire: %v\n", l.ID, expire)
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
expire = time.Now().Add(time.Duration(lk.TTL-1) * time.Second)
|
||||
log.Printf("renewed lease %d, expire: %v\n", lk.ID, expire)
|
||||
time.Sleep(time.Duration(lk.TTL-2) * time.Second)
|
||||
}
|
||||
}
|
||||
}
|
@ -1,94 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package command
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"github.com/coreos/etcd/clientv3/concurrency"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
// NewLockRacerCommand returns the cobra command for "lock-racer runner".
|
||||
func NewLockRacerCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "lock-racer [name of lock (defaults to 'racers')]",
|
||||
Short: "Performs lock race operation",
|
||||
Run: runRacerFunc,
|
||||
}
|
||||
cmd.Flags().IntVar(&totalClientConnections, "total-client-connections", 10, "total number of client connections")
|
||||
return cmd
|
||||
}
|
||||
|
||||
func runRacerFunc(cmd *cobra.Command, args []string) {
|
||||
racers := "racers"
|
||||
if len(args) == 1 {
|
||||
racers = args[0]
|
||||
}
|
||||
|
||||
if len(args) > 1 {
|
||||
ExitWithError(ExitBadArgs, errors.New("lock-racer takes at most one argument"))
|
||||
}
|
||||
|
||||
rcs := make([]roundClient, totalClientConnections)
|
||||
ctx := context.Background()
|
||||
// mu ensures validate and release funcs are atomic.
|
||||
var mu sync.Mutex
|
||||
cnt := 0
|
||||
|
||||
eps := endpointsFromFlag(cmd)
|
||||
|
||||
for i := range rcs {
|
||||
var (
|
||||
s *concurrency.Session
|
||||
err error
|
||||
)
|
||||
|
||||
rcs[i].c = newClient(eps, dialTimeout)
|
||||
|
||||
for {
|
||||
s, err = concurrency.NewSession(rcs[i].c)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
m := concurrency.NewMutex(s, racers)
|
||||
rcs[i].acquire = func() error { return m.Lock(ctx) }
|
||||
rcs[i].validate = func() error {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
if cnt++; cnt != 1 {
|
||||
return fmt.Errorf("bad lock; count: %d", cnt)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
rcs[i].release = func() error {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
if err := m.Unlock(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
cnt = 0
|
||||
return nil
|
||||
}
|
||||
}
|
||||
// each client creates 1 key from NewMutex() and delete it from Unlock()
|
||||
// a round involves in 2*len(rcs) requests.
|
||||
doRounds(rcs, rounds, 2*len(rcs))
|
||||
}
|
@ -1,70 +0,0 @@
|
||||
// Copyright 2017 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package command implements individual etcd-runner commands for the etcd-runner utility.
|
||||
package command
|
||||
|
||||
import (
|
||||
"log"
|
||||
"math/rand"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
const (
|
||||
cliName = "etcd-runner"
|
||||
cliDescription = "Stress tests using clientv3 functionality.."
|
||||
|
||||
defaultDialTimeout = 2 * time.Second
|
||||
)
|
||||
|
||||
var (
|
||||
rootCmd = &cobra.Command{
|
||||
Use: cliName,
|
||||
Short: cliDescription,
|
||||
SuggestFor: []string{"etcd-runner"},
|
||||
}
|
||||
)
|
||||
|
||||
func init() {
|
||||
cobra.EnablePrefixMatching = true
|
||||
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
|
||||
log.SetFlags(log.Lmicroseconds)
|
||||
|
||||
rootCmd.PersistentFlags().StringSliceVar(&endpoints, "endpoints", []string{"127.0.0.1:2379"}, "gRPC endpoints")
|
||||
rootCmd.PersistentFlags().DurationVar(&dialTimeout, "dial-timeout", defaultDialTimeout, "dial timeout for client connections")
|
||||
rootCmd.PersistentFlags().IntVar(&reqRate, "req-rate", 30, "maximum number of requests per second")
|
||||
rootCmd.PersistentFlags().IntVar(&rounds, "rounds", 100, "number of rounds to run; 0 to run forever")
|
||||
|
||||
rootCmd.AddCommand(
|
||||
NewElectionCommand(),
|
||||
NewLeaseRenewerCommand(),
|
||||
NewLockRacerCommand(),
|
||||
NewWatchCommand(),
|
||||
)
|
||||
}
|
||||
|
||||
func Start() {
|
||||
rootCmd.SetUsageFunc(usageFunc)
|
||||
|
||||
// Make help just show the usage
|
||||
rootCmd.SetHelpTemplate(`{{.UsageString}}`)
|
||||
|
||||
if err := rootCmd.Execute(); err != nil {
|
||||
ExitWithError(ExitError, err)
|
||||
}
|
||||
}
|
@ -1,210 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package command
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/etcd/clientv3"
|
||||
"github.com/coreos/etcd/pkg/stringutil"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"golang.org/x/time/rate"
|
||||
)
|
||||
|
||||
var (
|
||||
runningTime time.Duration // time for which operation should be performed
|
||||
noOfPrefixes int // total number of prefixes which will be watched upon
|
||||
watchPerPrefix int // number of watchers per prefix
|
||||
watchPrefix string // prefix append to keys in watcher
|
||||
totalKeys int // total number of keys for operation
|
||||
)
|
||||
|
||||
// NewWatchCommand returns the cobra command for "watcher runner".
|
||||
func NewWatchCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "watcher",
|
||||
Short: "Performs watch operation",
|
||||
Run: runWatcherFunc,
|
||||
}
|
||||
cmd.Flags().DurationVar(&runningTime, "running-time", 60, "number of seconds to run")
|
||||
cmd.Flags().StringVar(&watchPrefix, "prefix", "", "the prefix to append on all keys")
|
||||
cmd.Flags().IntVar(&noOfPrefixes, "total-prefixes", 10, "total no of prefixes to use")
|
||||
cmd.Flags().IntVar(&watchPerPrefix, "watch-per-prefix", 10, "number of watchers per prefix")
|
||||
cmd.Flags().IntVar(&totalKeys, "total-keys", 1000, "total number of keys to watch")
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
func runWatcherFunc(cmd *cobra.Command, args []string) {
|
||||
if len(args) > 0 {
|
||||
ExitWithError(ExitBadArgs, errors.New("watcher does not take any argument"))
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
for round := 0; round < rounds || rounds <= 0; round++ {
|
||||
fmt.Println("round", round)
|
||||
performWatchOnPrefixes(ctx, cmd, round)
|
||||
}
|
||||
}
|
||||
|
||||
func performWatchOnPrefixes(ctx context.Context, cmd *cobra.Command, round int) {
|
||||
keyPerPrefix := totalKeys / noOfPrefixes
|
||||
prefixes := stringutil.UniqueStrings(5, noOfPrefixes)
|
||||
keys := stringutil.RandomStrings(10, keyPerPrefix)
|
||||
|
||||
roundPrefix := fmt.Sprintf("%16x", round)
|
||||
|
||||
eps := endpointsFromFlag(cmd)
|
||||
|
||||
var (
|
||||
revision int64
|
||||
wg sync.WaitGroup
|
||||
gr *clientv3.GetResponse
|
||||
err error
|
||||
)
|
||||
|
||||
client := newClient(eps, dialTimeout)
|
||||
defer client.Close()
|
||||
|
||||
gr, err = getKey(ctx, client, "non-existent")
|
||||
if err != nil {
|
||||
log.Fatalf("failed to get the initial revision: %v", err)
|
||||
}
|
||||
revision = gr.Header.Revision
|
||||
|
||||
ctxt, cancel := context.WithDeadline(ctx, time.Now().Add(runningTime*time.Second))
|
||||
defer cancel()
|
||||
|
||||
// generate and put keys in cluster
|
||||
limiter := rate.NewLimiter(rate.Limit(reqRate), reqRate)
|
||||
|
||||
go func() {
|
||||
for _, key := range keys {
|
||||
for _, prefix := range prefixes {
|
||||
if err = limiter.Wait(ctxt); err != nil {
|
||||
return
|
||||
}
|
||||
if err = putKeyAtMostOnce(ctxt, client, watchPrefix+"-"+roundPrefix+"-"+prefix+"-"+key); err != nil {
|
||||
log.Fatalf("failed to put key: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
ctxc, cancelc := context.WithCancel(ctx)
|
||||
|
||||
wcs := make([]clientv3.WatchChan, 0)
|
||||
rcs := make([]*clientv3.Client, 0)
|
||||
|
||||
for _, prefix := range prefixes {
|
||||
for j := 0; j < watchPerPrefix; j++ {
|
||||
rc := newClient(eps, dialTimeout)
|
||||
rcs = append(rcs, rc)
|
||||
|
||||
wprefix := watchPrefix + "-" + roundPrefix + "-" + prefix
|
||||
|
||||
wc := rc.Watch(ctxc, wprefix, clientv3.WithPrefix(), clientv3.WithRev(revision))
|
||||
wcs = append(wcs, wc)
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
checkWatchResponse(wc, wprefix, keys)
|
||||
}()
|
||||
}
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
cancelc()
|
||||
|
||||
// verify all watch channels are closed
|
||||
for e, wc := range wcs {
|
||||
if _, ok := <-wc; ok {
|
||||
log.Fatalf("expected wc to be closed, but received %v", e)
|
||||
}
|
||||
}
|
||||
|
||||
for _, rc := range rcs {
|
||||
rc.Close()
|
||||
}
|
||||
|
||||
if err = deletePrefix(ctx, client, watchPrefix); err != nil {
|
||||
log.Fatalf("failed to clean up keys after test: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func checkWatchResponse(wc clientv3.WatchChan, prefix string, keys []string) {
|
||||
for n := 0; n < len(keys); {
|
||||
wr, more := <-wc
|
||||
if !more {
|
||||
log.Fatalf("expect more keys (received %d/%d) for %s", n, len(keys), prefix)
|
||||
}
|
||||
for _, event := range wr.Events {
|
||||
expectedKey := prefix + "-" + keys[n]
|
||||
receivedKey := string(event.Kv.Key)
|
||||
if expectedKey != receivedKey {
|
||||
log.Fatalf("expected key %q, got %q for prefix : %q\n", expectedKey, receivedKey, prefix)
|
||||
}
|
||||
n++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func putKeyAtMostOnce(ctx context.Context, client *clientv3.Client, key string) error {
|
||||
gr, err := getKey(ctx, client, key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var modrev int64
|
||||
if len(gr.Kvs) > 0 {
|
||||
modrev = gr.Kvs[0].ModRevision
|
||||
}
|
||||
|
||||
for ctx.Err() == nil {
|
||||
_, err := client.Txn(ctx).If(clientv3.Compare(clientv3.ModRevision(key), "=", modrev)).Then(clientv3.OpPut(key, key)).Commit()
|
||||
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
func deletePrefix(ctx context.Context, client *clientv3.Client, key string) error {
|
||||
for ctx.Err() == nil {
|
||||
if _, err := client.Delete(ctx, key, clientv3.WithPrefix()); err == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
func getKey(ctx context.Context, client *clientv3.Client, key string) (*clientv3.GetResponse, error) {
|
||||
for ctx.Err() == nil {
|
||||
if gr, err := client.Get(ctx, key); err == nil {
|
||||
return gr, nil
|
||||
}
|
||||
}
|
||||
return nil, ctx.Err()
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// etcd-runner is a program for testing etcd clientv3 features against a fault injected cluster.
|
||||
package main
|
@ -1,22 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// etcd-runner is a command line application that performs tests on etcd.
|
||||
package main
|
||||
|
||||
import "github.com/coreos/etcd/tools/functional-tester/etcd-runner/command"
|
||||
|
||||
func main() {
|
||||
command.Start()
|
||||
}
|
@ -1,264 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/etcd/clientv3"
|
||||
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
|
||||
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
)
|
||||
|
||||
const (
|
||||
retries = 7
|
||||
)
|
||||
|
||||
type Checker interface {
|
||||
// Check returns an error if the system fails a consistency check.
|
||||
Check() error
|
||||
}
|
||||
|
||||
type hashAndRevGetter interface {
|
||||
getRevisionHash() (revs map[string]int64, hashes map[string]int64, err error)
|
||||
}
|
||||
|
||||
type hashChecker struct {
|
||||
hrg hashAndRevGetter
|
||||
}
|
||||
|
||||
func newHashChecker(hrg hashAndRevGetter) Checker { return &hashChecker{hrg} }
|
||||
|
||||
const leaseCheckerTimeout = 10 * time.Second
|
||||
|
||||
func (hc *hashChecker) checkRevAndHashes() (err error) {
|
||||
var (
|
||||
revs map[string]int64
|
||||
hashes map[string]int64
|
||||
)
|
||||
|
||||
// retries in case of transient failure or etcd cluster has not stablized yet.
|
||||
for i := 0; i < retries; i++ {
|
||||
revs, hashes, err = hc.hrg.getRevisionHash()
|
||||
if err != nil {
|
||||
plog.Warningf("retry %d. failed to retrieve revison and hash (%v)", i, err)
|
||||
} else {
|
||||
sameRev := getSameValue(revs)
|
||||
sameHashes := getSameValue(hashes)
|
||||
if sameRev && sameHashes {
|
||||
return nil
|
||||
}
|
||||
plog.Warningf("retry %d. etcd cluster is not stable: [revisions: %v] and [hashes: %v]", i, revs, hashes)
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed revision and hash check (%v)", err)
|
||||
}
|
||||
|
||||
return fmt.Errorf("etcd cluster is not stable: [revisions: %v] and [hashes: %v]", revs, hashes)
|
||||
}
|
||||
|
||||
func (hc *hashChecker) Check() error {
|
||||
return hc.checkRevAndHashes()
|
||||
}
|
||||
|
||||
type leaseChecker struct {
|
||||
endpoint string
|
||||
ls *leaseStresser
|
||||
leaseClient pb.LeaseClient
|
||||
kvc pb.KVClient
|
||||
}
|
||||
|
||||
func (lc *leaseChecker) Check() error {
|
||||
conn, err := grpc.Dial(lc.ls.endpoint, grpc.WithInsecure(), grpc.WithBackoffMaxDelay(1))
|
||||
if err != nil {
|
||||
return fmt.Errorf("%v (%s)", err, lc.ls.endpoint)
|
||||
}
|
||||
defer func() {
|
||||
if conn != nil {
|
||||
conn.Close()
|
||||
}
|
||||
}()
|
||||
lc.kvc = pb.NewKVClient(conn)
|
||||
lc.leaseClient = pb.NewLeaseClient(conn)
|
||||
if err := lc.check(true, lc.ls.revokedLeases.leases); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := lc.check(false, lc.ls.aliveLeases.leases); err != nil {
|
||||
return err
|
||||
}
|
||||
return lc.checkShortLivedLeases()
|
||||
}
|
||||
|
||||
// checkShortLivedLeases ensures leases expire.
|
||||
func (lc *leaseChecker) checkShortLivedLeases() error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), leaseCheckerTimeout)
|
||||
errc := make(chan error)
|
||||
defer cancel()
|
||||
for leaseID := range lc.ls.shortLivedLeases.leases {
|
||||
go func(id int64) {
|
||||
errc <- lc.checkShortLivedLease(ctx, id)
|
||||
}(leaseID)
|
||||
}
|
||||
|
||||
var errs []error
|
||||
for range lc.ls.shortLivedLeases.leases {
|
||||
if err := <-errc; err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
return errsToError(errs)
|
||||
}
|
||||
|
||||
func (lc *leaseChecker) checkShortLivedLease(ctx context.Context, leaseID int64) (err error) {
|
||||
// retry in case of transient failure or lease is expired but not yet revoked due to the fact that etcd cluster didn't have enought time to delete it.
|
||||
var resp *pb.LeaseTimeToLiveResponse
|
||||
for i := 0; i < retries; i++ {
|
||||
resp, err = lc.getLeaseByID(ctx, leaseID)
|
||||
// lease not found, for ~v3.1 compatibilities, check ErrLeaseNotFound
|
||||
if (err == nil && resp.TTL == -1) || (err != nil && rpctypes.Error(err) == rpctypes.ErrLeaseNotFound) {
|
||||
return nil
|
||||
}
|
||||
if err != nil {
|
||||
plog.Debugf("retry %d. failed to retrieve lease %v error (%v)", i, leaseID, err)
|
||||
continue
|
||||
}
|
||||
if resp.TTL > 0 {
|
||||
plog.Debugf("lease %v is not expired. sleep for %d until it expires.", leaseID, resp.TTL)
|
||||
time.Sleep(time.Duration(resp.TTL) * time.Second)
|
||||
} else {
|
||||
plog.Debugf("retry %d. lease %v is expired but not yet revoked", i, leaseID)
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
if err = lc.checkLease(ctx, false, leaseID); err != nil {
|
||||
continue
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (lc *leaseChecker) checkLease(ctx context.Context, expired bool, leaseID int64) error {
|
||||
keysExpired, err := lc.hasKeysAttachedToLeaseExpired(ctx, leaseID)
|
||||
if err != nil {
|
||||
plog.Errorf("hasKeysAttachedToLeaseExpired error %v (endpoint %q)", err, lc.endpoint)
|
||||
return err
|
||||
}
|
||||
leaseExpired, err := lc.hasLeaseExpired(ctx, leaseID)
|
||||
if err != nil {
|
||||
plog.Errorf("hasLeaseExpired error %v (endpoint %q)", err, lc.endpoint)
|
||||
return err
|
||||
}
|
||||
if leaseExpired != keysExpired {
|
||||
return fmt.Errorf("lease %v expiration mismatch (lease expired=%v, keys expired=%v)", leaseID, leaseExpired, keysExpired)
|
||||
}
|
||||
if leaseExpired != expired {
|
||||
return fmt.Errorf("lease %v expected expired=%v, got %v", leaseID, expired, leaseExpired)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (lc *leaseChecker) check(expired bool, leases map[int64]time.Time) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), leaseCheckerTimeout)
|
||||
defer cancel()
|
||||
for leaseID := range leases {
|
||||
if err := lc.checkLease(ctx, expired, leaseID); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (lc *leaseChecker) getLeaseByID(ctx context.Context, leaseID int64) (*pb.LeaseTimeToLiveResponse, error) {
|
||||
ltl := &pb.LeaseTimeToLiveRequest{ID: leaseID, Keys: true}
|
||||
return lc.leaseClient.LeaseTimeToLive(ctx, ltl, grpc.FailFast(false))
|
||||
}
|
||||
|
||||
func (lc *leaseChecker) hasLeaseExpired(ctx context.Context, leaseID int64) (bool, error) {
|
||||
// keep retrying until lease's state is known or ctx is being canceled
|
||||
for ctx.Err() == nil {
|
||||
resp, err := lc.getLeaseByID(ctx, leaseID)
|
||||
if err != nil {
|
||||
// for ~v3.1 compatibilities
|
||||
if rpctypes.Error(err) == rpctypes.ErrLeaseNotFound {
|
||||
return true, nil
|
||||
}
|
||||
} else {
|
||||
return resp.TTL == -1, nil
|
||||
}
|
||||
plog.Warningf("hasLeaseExpired %v resp %v error %v (endpoint %q)", leaseID, resp, err, lc.endpoint)
|
||||
}
|
||||
return false, ctx.Err()
|
||||
}
|
||||
|
||||
// The keys attached to the lease has the format of "<leaseID>_<idx>" where idx is the ordering key creation
|
||||
// Since the format of keys contains about leaseID, finding keys base on "<leaseID>" prefix
|
||||
// determines whether the attached keys for a given leaseID has been deleted or not
|
||||
func (lc *leaseChecker) hasKeysAttachedToLeaseExpired(ctx context.Context, leaseID int64) (bool, error) {
|
||||
resp, err := lc.kvc.Range(ctx, &pb.RangeRequest{
|
||||
Key: []byte(fmt.Sprintf("%d", leaseID)),
|
||||
RangeEnd: []byte(clientv3.GetPrefixRangeEnd(fmt.Sprintf("%d", leaseID))),
|
||||
}, grpc.FailFast(false))
|
||||
if err != nil {
|
||||
plog.Errorf("retrieving keys attached to lease %v error %v (endpoint %q)", leaseID, err, lc.endpoint)
|
||||
return false, err
|
||||
}
|
||||
return len(resp.Kvs) == 0, nil
|
||||
}
|
||||
|
||||
// compositeChecker implements a checker that runs a slice of Checkers concurrently.
|
||||
type compositeChecker struct{ checkers []Checker }
|
||||
|
||||
func newCompositeChecker(checkers []Checker) Checker {
|
||||
return &compositeChecker{checkers}
|
||||
}
|
||||
|
||||
func (cchecker *compositeChecker) Check() error {
|
||||
errc := make(chan error)
|
||||
for _, c := range cchecker.checkers {
|
||||
go func(chk Checker) { errc <- chk.Check() }(c)
|
||||
}
|
||||
var errs []error
|
||||
for range cchecker.checkers {
|
||||
if err := <-errc; err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
return errsToError(errs)
|
||||
}
|
||||
|
||||
type runnerChecker struct {
|
||||
errc chan error
|
||||
}
|
||||
|
||||
func (rc *runnerChecker) Check() error {
|
||||
select {
|
||||
case err := <-rc.errc:
|
||||
return err
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
type noChecker struct{}
|
||||
|
||||
func newNoChecker() Checker { return &noChecker{} }
|
||||
func (nc *noChecker) Check() error { return nil }
|
@ -1,261 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"net"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
||||
"github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
)
|
||||
|
||||
// agentConfig holds information needed to interact/configure an agent and its etcd process
|
||||
type agentConfig struct {
|
||||
endpoint string
|
||||
clientPort int
|
||||
advertiseClientPort int
|
||||
peerPort int
|
||||
advertisePeerPort int
|
||||
failpointPort int
|
||||
}
|
||||
|
||||
type cluster struct {
|
||||
agents []agentConfig
|
||||
Size int
|
||||
Members []*member
|
||||
}
|
||||
|
||||
type ClusterStatus struct {
|
||||
AgentStatuses map[string]client.Status
|
||||
}
|
||||
|
||||
func (c *cluster) bootstrap() error {
|
||||
size := len(c.agents)
|
||||
|
||||
members := make([]*member, size)
|
||||
memberNameURLs := make([]string, size)
|
||||
for i, a := range c.agents {
|
||||
agent, err := client.NewAgent(a.endpoint)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
host, _, err := net.SplitHostPort(a.endpoint)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
members[i] = &member{
|
||||
Agent: agent,
|
||||
Endpoint: a.endpoint,
|
||||
Name: fmt.Sprintf("etcd-%d", i),
|
||||
ClientURL: fmt.Sprintf("http://%s:%d", host, a.clientPort),
|
||||
AdvertiseClientURL: fmt.Sprintf("http://%s:%d", host, a.advertiseClientPort),
|
||||
PeerURL: fmt.Sprintf("http://%s:%d", host, a.peerPort),
|
||||
AdvertisePeerURL: fmt.Sprintf("http://%s:%d", host, a.advertisePeerPort),
|
||||
FailpointURL: fmt.Sprintf("http://%s:%d", host, a.failpointPort),
|
||||
}
|
||||
memberNameURLs[i] = members[i].ClusterEntry()
|
||||
}
|
||||
clusterStr := strings.Join(memberNameURLs, ",")
|
||||
token := fmt.Sprint(rand.Int())
|
||||
|
||||
for i, m := range members {
|
||||
flags := append(
|
||||
m.Flags(),
|
||||
"--initial-cluster-token", token,
|
||||
"--initial-cluster", clusterStr,
|
||||
"--snapshot-count", "10000")
|
||||
|
||||
if _, err := m.Agent.Start(flags...); err != nil {
|
||||
// cleanup
|
||||
for _, m := range members[:i] {
|
||||
m.Agent.Terminate()
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
c.Size = size
|
||||
c.Members = members
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *cluster) Reset() error { return c.bootstrap() }
|
||||
|
||||
func (c *cluster) WaitHealth() error {
|
||||
var err error
|
||||
// wait 60s to check cluster health.
|
||||
// TODO: set it to a reasonable value. It is set that high because
|
||||
// follower may use long time to catch up the leader when reboot under
|
||||
// reasonable workload (https://github.com/coreos/etcd/issues/2698)
|
||||
for i := 0; i < 60; i++ {
|
||||
for _, m := range c.Members {
|
||||
if err = m.SetHealthKeyV3(); err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
plog.Warningf("#%d setHealthKey error (%v)", i, err)
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// GetLeader returns the index of leader and error if any.
|
||||
func (c *cluster) GetLeader() (int, error) {
|
||||
for i, m := range c.Members {
|
||||
isLeader, err := m.IsLeader()
|
||||
if isLeader || err != nil {
|
||||
return i, err
|
||||
}
|
||||
}
|
||||
return 0, fmt.Errorf("no leader found")
|
||||
}
|
||||
|
||||
func (c *cluster) Cleanup() error {
|
||||
var lasterr error
|
||||
for _, m := range c.Members {
|
||||
if err := m.Agent.Cleanup(); err != nil {
|
||||
lasterr = err
|
||||
}
|
||||
}
|
||||
return lasterr
|
||||
}
|
||||
|
||||
func (c *cluster) Terminate() {
|
||||
for _, m := range c.Members {
|
||||
m.Agent.Terminate()
|
||||
}
|
||||
}
|
||||
|
||||
func (c *cluster) Status() ClusterStatus {
|
||||
cs := ClusterStatus{
|
||||
AgentStatuses: make(map[string]client.Status),
|
||||
}
|
||||
|
||||
for _, m := range c.Members {
|
||||
s, err := m.Agent.Status()
|
||||
// TODO: add a.Desc() as a key of the map
|
||||
desc := m.Endpoint
|
||||
if err != nil {
|
||||
cs.AgentStatuses[desc] = client.Status{State: "unknown"}
|
||||
plog.Printf("failed to get the status of agent [%s]", desc)
|
||||
}
|
||||
cs.AgentStatuses[desc] = s
|
||||
}
|
||||
return cs
|
||||
}
|
||||
|
||||
// maxRev returns the maximum revision found on the cluster.
|
||||
func (c *cluster) maxRev() (rev int64, err error) {
|
||||
ctx, cancel := context.WithTimeout(context.TODO(), time.Second)
|
||||
defer cancel()
|
||||
revc, errc := make(chan int64, len(c.Members)), make(chan error, len(c.Members))
|
||||
for i := range c.Members {
|
||||
go func(m *member) {
|
||||
mrev, merr := m.Rev(ctx)
|
||||
revc <- mrev
|
||||
errc <- merr
|
||||
}(c.Members[i])
|
||||
}
|
||||
for i := 0; i < len(c.Members); i++ {
|
||||
if merr := <-errc; merr != nil {
|
||||
err = merr
|
||||
}
|
||||
if mrev := <-revc; mrev > rev {
|
||||
rev = mrev
|
||||
}
|
||||
}
|
||||
return rev, err
|
||||
}
|
||||
|
||||
func (c *cluster) getRevisionHash() (map[string]int64, map[string]int64, error) {
|
||||
revs := make(map[string]int64)
|
||||
hashes := make(map[string]int64)
|
||||
for _, m := range c.Members {
|
||||
rev, hash, err := m.RevHash()
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
revs[m.ClientURL] = rev
|
||||
hashes[m.ClientURL] = hash
|
||||
}
|
||||
return revs, hashes, nil
|
||||
}
|
||||
|
||||
func (c *cluster) compactKV(rev int64, timeout time.Duration) (err error) {
|
||||
if rev <= 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for i, m := range c.Members {
|
||||
u := m.ClientURL
|
||||
conn, derr := m.dialGRPC()
|
||||
if derr != nil {
|
||||
plog.Printf("[compact kv #%d] dial error %v (endpoint %s)", i, derr, u)
|
||||
err = derr
|
||||
continue
|
||||
}
|
||||
kvc := pb.NewKVClient(conn)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
plog.Printf("[compact kv #%d] starting (endpoint %s)", i, u)
|
||||
_, cerr := kvc.Compact(ctx, &pb.CompactionRequest{Revision: rev, Physical: true}, grpc.FailFast(false))
|
||||
cancel()
|
||||
conn.Close()
|
||||
succeed := true
|
||||
if cerr != nil {
|
||||
if strings.Contains(cerr.Error(), "required revision has been compacted") && i > 0 {
|
||||
plog.Printf("[compact kv #%d] already compacted (endpoint %s)", i, u)
|
||||
} else {
|
||||
plog.Warningf("[compact kv #%d] error %v (endpoint %s)", i, cerr, u)
|
||||
err = cerr
|
||||
succeed = false
|
||||
}
|
||||
}
|
||||
if succeed {
|
||||
plog.Printf("[compact kv #%d] done (endpoint %s)", i, u)
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *cluster) checkCompact(rev int64) error {
|
||||
if rev == 0 {
|
||||
return nil
|
||||
}
|
||||
for _, m := range c.Members {
|
||||
if err := m.CheckCompact(rev); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *cluster) defrag() error {
|
||||
for _, m := range c.Members {
|
||||
if err := m.Defrag(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// etcd-tester is a single controller for all etcd-agents to manage an etcd cluster and simulate failures.
|
||||
package main
|
@ -1,97 +0,0 @@
|
||||
// Copyright 2017 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os/exec"
|
||||
"syscall"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
)
|
||||
|
||||
type runnerStresser struct {
|
||||
cmd *exec.Cmd
|
||||
cmdStr string
|
||||
args []string
|
||||
rl *rate.Limiter
|
||||
reqRate int
|
||||
|
||||
errc chan error
|
||||
donec chan struct{}
|
||||
}
|
||||
|
||||
func newRunnerStresser(cmdStr string, args []string, rl *rate.Limiter, reqRate int) *runnerStresser {
|
||||
rl.SetLimit(rl.Limit() - rate.Limit(reqRate))
|
||||
return &runnerStresser{
|
||||
cmdStr: cmdStr,
|
||||
args: args,
|
||||
rl: rl,
|
||||
reqRate: reqRate,
|
||||
errc: make(chan error, 1),
|
||||
donec: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
func (rs *runnerStresser) setupOnce() (err error) {
|
||||
if rs.cmd != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
rs.cmd = exec.Command(rs.cmdStr, rs.args...)
|
||||
stderr, err := rs.cmd.StderrPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
go func() {
|
||||
defer close(rs.donec)
|
||||
out, err := ioutil.ReadAll(stderr)
|
||||
if err != nil {
|
||||
rs.errc <- err
|
||||
} else {
|
||||
rs.errc <- fmt.Errorf("(%v %v) stderr %v", rs.cmdStr, rs.args, string(out))
|
||||
}
|
||||
}()
|
||||
|
||||
return rs.cmd.Start()
|
||||
}
|
||||
|
||||
func (rs *runnerStresser) Stress() (err error) {
|
||||
if err = rs.setupOnce(); err != nil {
|
||||
return err
|
||||
}
|
||||
return syscall.Kill(rs.cmd.Process.Pid, syscall.SIGCONT)
|
||||
}
|
||||
|
||||
func (rs *runnerStresser) Pause() {
|
||||
syscall.Kill(rs.cmd.Process.Pid, syscall.SIGSTOP)
|
||||
}
|
||||
|
||||
func (rs *runnerStresser) Close() {
|
||||
syscall.Kill(rs.cmd.Process.Pid, syscall.SIGINT)
|
||||
rs.cmd.Wait()
|
||||
<-rs.donec
|
||||
rs.rl.SetLimit(rs.rl.Limit() + rate.Limit(rs.reqRate))
|
||||
}
|
||||
|
||||
func (rs *runnerStresser) ModifiedKeys() int64 {
|
||||
return 1
|
||||
}
|
||||
|
||||
func (rs *runnerStresser) Checker() Checker {
|
||||
return &runnerChecker{rs.errc}
|
||||
}
|
@ -1,160 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type failpointStats struct {
|
||||
// crashes counts the number of crashes for a failpoint
|
||||
crashes map[string]int
|
||||
// mu protects crashes
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
var fpStats failpointStats
|
||||
|
||||
func failpointFailures(c *cluster, failpoints []string) (ret []failure, err error) {
|
||||
var fps []string
|
||||
fps, err = failpointPaths(c.Members[0].FailpointURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// create failure objects for all failpoints
|
||||
for _, fp := range fps {
|
||||
if len(fp) == 0 {
|
||||
continue
|
||||
}
|
||||
fpFails := failuresFromFailpoint(fp, failpoints)
|
||||
// wrap in delays so failpoint has time to trigger
|
||||
for i, fpf := range fpFails {
|
||||
if strings.Contains(fp, "Snap") {
|
||||
// hack to trigger snapshot failpoints
|
||||
fpFails[i] = &failureUntilSnapshot{fpf}
|
||||
} else {
|
||||
fpFails[i] = &failureDelay{fpf, 3 * time.Second}
|
||||
}
|
||||
}
|
||||
ret = append(ret, fpFails...)
|
||||
}
|
||||
fpStats.crashes = make(map[string]int)
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func failpointPaths(endpoint string) ([]string, error) {
|
||||
resp, err := http.Get(endpoint)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
body, rerr := ioutil.ReadAll(resp.Body)
|
||||
if rerr != nil {
|
||||
return nil, rerr
|
||||
}
|
||||
var fps []string
|
||||
for _, l := range strings.Split(string(body), "\n") {
|
||||
fp := strings.Split(l, "=")[0]
|
||||
fps = append(fps, fp)
|
||||
}
|
||||
return fps, nil
|
||||
}
|
||||
|
||||
// failpoints follows FreeBSD KFAIL_POINT syntax.
|
||||
// e.g. panic("etcd-tester"),1*sleep(1000)->panic("etcd-tester")
|
||||
func failuresFromFailpoint(fp string, failpoints []string) (fs []failure) {
|
||||
recov := makeRecoverFailpoint(fp)
|
||||
for _, failpoint := range failpoints {
|
||||
inject := makeInjectFailpoint(fp, failpoint)
|
||||
fs = append(fs, []failure{
|
||||
&failureOne{
|
||||
description: description(fmt.Sprintf("failpoint %s (one: %s)", fp, failpoint)),
|
||||
injectMember: inject,
|
||||
recoverMember: recov,
|
||||
},
|
||||
&failureAll{
|
||||
description: description(fmt.Sprintf("failpoint %s (all: %s)", fp, failpoint)),
|
||||
injectMember: inject,
|
||||
recoverMember: recov,
|
||||
},
|
||||
&failureMajority{
|
||||
description: description(fmt.Sprintf("failpoint %s (majority: %s)", fp, failpoint)),
|
||||
injectMember: inject,
|
||||
recoverMember: recov,
|
||||
},
|
||||
&failureLeader{
|
||||
failureByFunc{
|
||||
description: description(fmt.Sprintf("failpoint %s (leader: %s)", fp, failpoint)),
|
||||
injectMember: inject,
|
||||
recoverMember: recov,
|
||||
},
|
||||
0,
|
||||
},
|
||||
}...)
|
||||
}
|
||||
return fs
|
||||
}
|
||||
|
||||
func makeInjectFailpoint(fp, val string) injectMemberFunc {
|
||||
return func(m *member) (err error) {
|
||||
return putFailpoint(m.FailpointURL, fp, val)
|
||||
}
|
||||
}
|
||||
|
||||
func makeRecoverFailpoint(fp string) recoverMemberFunc {
|
||||
return func(m *member) error {
|
||||
if err := delFailpoint(m.FailpointURL, fp); err == nil {
|
||||
return nil
|
||||
}
|
||||
// node not responding, likely dead from fp panic; restart
|
||||
fpStats.mu.Lock()
|
||||
fpStats.crashes[fp]++
|
||||
fpStats.mu.Unlock()
|
||||
return recoverStop(m)
|
||||
}
|
||||
}
|
||||
|
||||
func putFailpoint(ep, fp, val string) error {
|
||||
req, _ := http.NewRequest(http.MethodPut, ep+"/"+fp, strings.NewReader(val))
|
||||
c := http.Client{}
|
||||
resp, err := c.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
resp.Body.Close()
|
||||
if resp.StatusCode/100 != 2 {
|
||||
return fmt.Errorf("failed to PUT %s=%s at %s (%v)", fp, val, ep, resp.Status)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func delFailpoint(ep, fp string) error {
|
||||
req, _ := http.NewRequest(http.MethodDelete, ep+"/"+fp, strings.NewReader(""))
|
||||
c := http.Client{}
|
||||
resp, err := c.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
resp.Body.Close()
|
||||
if resp.StatusCode/100 != 2 {
|
||||
return fmt.Errorf("failed to DELETE %s at %s (%v)", fp, ep, resp.Status)
|
||||
}
|
||||
return nil
|
||||
}
|
@ -1,205 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os/exec"
|
||||
"time"
|
||||
)
|
||||
|
||||
type failure interface {
|
||||
// Inject injeccts the failure into the testing cluster at the given
|
||||
// round. When calling the function, the cluster should be in health.
|
||||
Inject(c *cluster, round int) error
|
||||
// Recover recovers the injected failure caused by the injection of the
|
||||
// given round and wait for the recovery of the testing cluster.
|
||||
Recover(c *cluster, round int) error
|
||||
// Desc returns a description of the failure
|
||||
Desc() string
|
||||
}
|
||||
|
||||
type description string
|
||||
|
||||
func (d description) Desc() string { return string(d) }
|
||||
|
||||
type injectMemberFunc func(*member) error
|
||||
type recoverMemberFunc func(*member) error
|
||||
|
||||
type failureByFunc struct {
|
||||
description
|
||||
injectMember injectMemberFunc
|
||||
recoverMember recoverMemberFunc
|
||||
}
|
||||
|
||||
type failureOne failureByFunc
|
||||
type failureAll failureByFunc
|
||||
type failureMajority failureByFunc
|
||||
type failureLeader struct {
|
||||
failureByFunc
|
||||
idx int
|
||||
}
|
||||
|
||||
type failureDelay struct {
|
||||
failure
|
||||
delayDuration time.Duration
|
||||
}
|
||||
|
||||
// failureUntilSnapshot injects a failure and waits for a snapshot event
|
||||
type failureUntilSnapshot struct{ failure }
|
||||
|
||||
func (f *failureOne) Inject(c *cluster, round int) error {
|
||||
return f.injectMember(c.Members[round%c.Size])
|
||||
}
|
||||
|
||||
func (f *failureOne) Recover(c *cluster, round int) error {
|
||||
if err := f.recoverMember(c.Members[round%c.Size]); err != nil {
|
||||
return err
|
||||
}
|
||||
return c.WaitHealth()
|
||||
}
|
||||
|
||||
func (f *failureAll) Inject(c *cluster, round int) error {
|
||||
for _, m := range c.Members {
|
||||
if err := f.injectMember(m); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *failureAll) Recover(c *cluster, round int) error {
|
||||
for _, m := range c.Members {
|
||||
if err := f.recoverMember(m); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return c.WaitHealth()
|
||||
}
|
||||
|
||||
func (f *failureMajority) Inject(c *cluster, round int) error {
|
||||
for i := range killMap(c.Size, round) {
|
||||
if err := f.injectMember(c.Members[i]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *failureMajority) Recover(c *cluster, round int) error {
|
||||
for i := range killMap(c.Size, round) {
|
||||
if err := f.recoverMember(c.Members[i]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *failureLeader) Inject(c *cluster, round int) error {
|
||||
idx, err := c.GetLeader()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f.idx = idx
|
||||
return f.injectMember(c.Members[idx])
|
||||
}
|
||||
|
||||
func (f *failureLeader) Recover(c *cluster, round int) error {
|
||||
if err := f.recoverMember(c.Members[f.idx]); err != nil {
|
||||
return err
|
||||
}
|
||||
return c.WaitHealth()
|
||||
}
|
||||
|
||||
func (f *failureDelay) Inject(c *cluster, round int) error {
|
||||
if err := f.failure.Inject(c, round); err != nil {
|
||||
return err
|
||||
}
|
||||
if f.delayDuration > 0 {
|
||||
plog.Infof("sleeping delay duration %v for %q", f.delayDuration, f.failure.Desc())
|
||||
time.Sleep(f.delayDuration)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *failureUntilSnapshot) Inject(c *cluster, round int) error {
|
||||
if err := f.failure.Inject(c, round); err != nil {
|
||||
return err
|
||||
}
|
||||
if c.Size < 3 {
|
||||
return nil
|
||||
}
|
||||
// maxRev may fail since failure just injected, retry if failed.
|
||||
startRev, err := c.maxRev()
|
||||
for i := 0; i < 10 && startRev == 0; i++ {
|
||||
startRev, err = c.maxRev()
|
||||
}
|
||||
if startRev == 0 {
|
||||
return err
|
||||
}
|
||||
lastRev := startRev
|
||||
// Normal healthy cluster could accept 1000req/s at least.
|
||||
// Give it 3-times time to create a new snapshot.
|
||||
retry := snapshotCount / 1000 * 3
|
||||
for j := 0; j < retry; j++ {
|
||||
lastRev, _ = c.maxRev()
|
||||
// If the number of proposals committed is bigger than snapshot count,
|
||||
// a new snapshot should have been created.
|
||||
if lastRev-startRev > snapshotCount {
|
||||
return nil
|
||||
}
|
||||
time.Sleep(time.Second)
|
||||
}
|
||||
return fmt.Errorf("cluster too slow: only commit %d requests in %ds", lastRev-startRev, retry)
|
||||
}
|
||||
|
||||
func (f *failureUntilSnapshot) Desc() string {
|
||||
return f.failure.Desc() + " for a long time and expect it to recover from an incoming snapshot"
|
||||
}
|
||||
|
||||
func killMap(size int, seed int) map[int]bool {
|
||||
m := make(map[int]bool)
|
||||
r := rand.New(rand.NewSource(int64(seed)))
|
||||
majority := size/2 + 1
|
||||
for {
|
||||
m[r.Intn(size)] = true
|
||||
if len(m) >= majority {
|
||||
return m
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type failureNop failureByFunc
|
||||
|
||||
func (f *failureNop) Inject(c *cluster, round int) error { return nil }
|
||||
func (f *failureNop) Recover(c *cluster, round int) error { return nil }
|
||||
|
||||
type failureExternal struct {
|
||||
failure
|
||||
|
||||
description string
|
||||
scriptPath string
|
||||
}
|
||||
|
||||
func (f *failureExternal) Inject(c *cluster, round int) error {
|
||||
return exec.Command(f.scriptPath, "enable", fmt.Sprintf("%d", round)).Run()
|
||||
}
|
||||
|
||||
func (f *failureExternal) Recover(c *cluster, round int) error {
|
||||
return exec.Command(f.scriptPath, "disable", fmt.Sprintf("%d", round)).Run()
|
||||
}
|
||||
|
||||
func (f *failureExternal) Desc() string { return f.description }
|
@ -1,177 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
snapshotCount = 10000
|
||||
slowNetworkLatency = 500 // 500 millisecond
|
||||
randomVariation = 50
|
||||
|
||||
// delay duration to trigger leader election (default election timeout 1s)
|
||||
triggerElectionDur = 5 * time.Second
|
||||
|
||||
// Wait more when it recovers from slow network, because network layer
|
||||
// needs extra time to propagate traffic control (tc command) change.
|
||||
// Otherwise, we get different hash values from the previous revision.
|
||||
// For more detail, please see https://github.com/coreos/etcd/issues/5121.
|
||||
waitRecover = 5 * time.Second
|
||||
)
|
||||
|
||||
func injectStop(m *member) error { return m.Agent.Stop() }
|
||||
func recoverStop(m *member) error {
|
||||
_, err := m.Agent.Restart()
|
||||
return err
|
||||
}
|
||||
|
||||
func newFailureKillAll() failure {
|
||||
return &failureAll{
|
||||
description: "kill all members",
|
||||
injectMember: injectStop,
|
||||
recoverMember: recoverStop,
|
||||
}
|
||||
}
|
||||
|
||||
func newFailureKillMajority() failure {
|
||||
return &failureMajority{
|
||||
description: "kill majority of the cluster",
|
||||
injectMember: injectStop,
|
||||
recoverMember: recoverStop,
|
||||
}
|
||||
}
|
||||
|
||||
func newFailureKillOne() failure {
|
||||
return &failureOne{
|
||||
description: "kill one random member",
|
||||
injectMember: injectStop,
|
||||
recoverMember: recoverStop,
|
||||
}
|
||||
}
|
||||
|
||||
func newFailureKillLeader() failure {
|
||||
ff := failureByFunc{
|
||||
description: "kill leader member",
|
||||
injectMember: injectStop,
|
||||
recoverMember: recoverStop,
|
||||
}
|
||||
return &failureLeader{ff, 0}
|
||||
}
|
||||
|
||||
func newFailureKillOneForLongTime() failure {
|
||||
return &failureUntilSnapshot{newFailureKillOne()}
|
||||
}
|
||||
|
||||
func newFailureKillLeaderForLongTime() failure {
|
||||
return &failureUntilSnapshot{newFailureKillLeader()}
|
||||
}
|
||||
|
||||
func injectDropPort(m *member) error { return m.Agent.DropPort(m.peerPort()) }
|
||||
func recoverDropPort(m *member) error { return m.Agent.RecoverPort(m.peerPort()) }
|
||||
|
||||
func newFailureIsolate() failure {
|
||||
f := &failureOne{
|
||||
description: "isolate one member",
|
||||
injectMember: injectDropPort,
|
||||
recoverMember: recoverDropPort,
|
||||
}
|
||||
return &failureDelay{
|
||||
failure: f,
|
||||
delayDuration: triggerElectionDur,
|
||||
}
|
||||
}
|
||||
|
||||
func newFailureIsolateAll() failure {
|
||||
f := &failureAll{
|
||||
description: "isolate all members",
|
||||
injectMember: injectDropPort,
|
||||
recoverMember: recoverDropPort,
|
||||
}
|
||||
return &failureDelay{
|
||||
failure: f,
|
||||
delayDuration: triggerElectionDur,
|
||||
}
|
||||
}
|
||||
|
||||
func injectLatency(m *member) error {
|
||||
if err := m.Agent.SetLatency(slowNetworkLatency, randomVariation); err != nil {
|
||||
m.Agent.RemoveLatency()
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func recoverLatency(m *member) error {
|
||||
if err := m.Agent.RemoveLatency(); err != nil {
|
||||
return err
|
||||
}
|
||||
time.Sleep(waitRecover)
|
||||
return nil
|
||||
}
|
||||
|
||||
func newFailureSlowNetworkOneMember() failure {
|
||||
desc := fmt.Sprintf("slow down one member's network by adding %d ms latency", slowNetworkLatency)
|
||||
f := &failureOne{
|
||||
description: description(desc),
|
||||
injectMember: injectLatency,
|
||||
recoverMember: recoverLatency,
|
||||
}
|
||||
return &failureDelay{
|
||||
failure: f,
|
||||
delayDuration: triggerElectionDur,
|
||||
}
|
||||
}
|
||||
|
||||
func newFailureSlowNetworkLeader() failure {
|
||||
desc := fmt.Sprintf("slow down leader's network by adding %d ms latency", slowNetworkLatency)
|
||||
ff := failureByFunc{
|
||||
description: description(desc),
|
||||
injectMember: injectLatency,
|
||||
recoverMember: recoverLatency,
|
||||
}
|
||||
f := &failureLeader{ff, 0}
|
||||
return &failureDelay{
|
||||
failure: f,
|
||||
delayDuration: triggerElectionDur,
|
||||
}
|
||||
}
|
||||
|
||||
func newFailureSlowNetworkAll() failure {
|
||||
f := &failureAll{
|
||||
description: "slow down all members' network",
|
||||
injectMember: injectLatency,
|
||||
recoverMember: recoverLatency,
|
||||
}
|
||||
return &failureDelay{
|
||||
failure: f,
|
||||
delayDuration: triggerElectionDur,
|
||||
}
|
||||
}
|
||||
|
||||
func newFailureNop() failure {
|
||||
return &failureNop{
|
||||
description: "no failure",
|
||||
}
|
||||
}
|
||||
|
||||
func newFailureExternal(scriptPath string) failure {
|
||||
return &failureExternal{
|
||||
description: fmt.Sprintf("external fault injector (script: %s)", scriptPath),
|
||||
scriptPath: scriptPath,
|
||||
}
|
||||
}
|
@ -1,44 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
type statusHandler struct {
|
||||
status *Status
|
||||
}
|
||||
|
||||
func (sh statusHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
en := json.NewEncoder(w)
|
||||
|
||||
sh.status.mu.Lock()
|
||||
defer sh.status.mu.Unlock()
|
||||
|
||||
if err := en.Encode(Status{
|
||||
Since: sh.status.Since,
|
||||
Failures: sh.status.Failures,
|
||||
RoundLimit: sh.status.RoundLimit,
|
||||
Cluster: sh.status.cluster.Status(),
|
||||
cluster: sh.status.cluster,
|
||||
Round: sh.status.Round,
|
||||
Case: sh.status.Case,
|
||||
}); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
}
|
||||
}
|
@ -1,331 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/etcd/etcdserver"
|
||||
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
|
||||
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
"google.golang.org/grpc"
|
||||
"google.golang.org/grpc/transport"
|
||||
)
|
||||
|
||||
type keyStresser struct {
|
||||
Endpoint string
|
||||
|
||||
keyLargeSize int
|
||||
keySize int
|
||||
keySuffixRange int
|
||||
keyTxnSuffixRange int
|
||||
keyTxnOps int
|
||||
|
||||
N int
|
||||
|
||||
rateLimiter *rate.Limiter
|
||||
|
||||
wg sync.WaitGroup
|
||||
|
||||
cancel func()
|
||||
conn *grpc.ClientConn
|
||||
// atomicModifiedKeys records the number of keys created and deleted by the stresser.
|
||||
atomicModifiedKeys int64
|
||||
|
||||
stressTable *stressTable
|
||||
}
|
||||
|
||||
func (s *keyStresser) Stress() error {
|
||||
// TODO: add backoff option
|
||||
conn, err := grpc.Dial(s.Endpoint, grpc.WithInsecure())
|
||||
if err != nil {
|
||||
return fmt.Errorf("%v (%s)", err, s.Endpoint)
|
||||
}
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
s.wg.Add(s.N)
|
||||
s.conn = conn
|
||||
s.cancel = cancel
|
||||
|
||||
kvc := pb.NewKVClient(conn)
|
||||
|
||||
var stressEntries = []stressEntry{
|
||||
{weight: 0.7, f: newStressPut(kvc, s.keySuffixRange, s.keySize)},
|
||||
{
|
||||
weight: 0.7 * float32(s.keySize) / float32(s.keyLargeSize),
|
||||
f: newStressPut(kvc, s.keySuffixRange, s.keyLargeSize),
|
||||
},
|
||||
{weight: 0.07, f: newStressRange(kvc, s.keySuffixRange)},
|
||||
{weight: 0.07, f: newStressRangeInterval(kvc, s.keySuffixRange)},
|
||||
{weight: 0.07, f: newStressDelete(kvc, s.keySuffixRange)},
|
||||
{weight: 0.07, f: newStressDeleteInterval(kvc, s.keySuffixRange)},
|
||||
}
|
||||
if s.keyTxnSuffixRange > 0 {
|
||||
// adjust to make up ±70% of workloads with writes
|
||||
stressEntries[0].weight = 0.35
|
||||
stressEntries = append(stressEntries, stressEntry{
|
||||
weight: 0.35,
|
||||
f: newStressTxn(kvc, s.keyTxnSuffixRange, s.keyTxnOps),
|
||||
})
|
||||
}
|
||||
s.stressTable = createStressTable(stressEntries)
|
||||
|
||||
for i := 0; i < s.N; i++ {
|
||||
go s.run(ctx)
|
||||
}
|
||||
|
||||
plog.Infof("keyStresser %q is started", s.Endpoint)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *keyStresser) run(ctx context.Context) {
|
||||
defer s.wg.Done()
|
||||
|
||||
for {
|
||||
if err := s.rateLimiter.Wait(ctx); err == context.Canceled {
|
||||
return
|
||||
}
|
||||
|
||||
// TODO: 10-second is enough timeout to cover leader failure
|
||||
// and immediate leader election. Find out what other cases this
|
||||
// could be timed out.
|
||||
sctx, scancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
err, modifiedKeys := s.stressTable.choose()(sctx)
|
||||
scancel()
|
||||
if err == nil {
|
||||
atomic.AddInt64(&s.atomicModifiedKeys, modifiedKeys)
|
||||
continue
|
||||
}
|
||||
|
||||
switch rpctypes.ErrorDesc(err) {
|
||||
case context.DeadlineExceeded.Error():
|
||||
// This retries when request is triggered at the same time as
|
||||
// leader failure. When we terminate the leader, the request to
|
||||
// that leader cannot be processed, and times out. Also requests
|
||||
// to followers cannot be forwarded to the old leader, so timing out
|
||||
// as well. We want to keep stressing until the cluster elects a
|
||||
// new leader and start processing requests again.
|
||||
case etcdserver.ErrTimeoutDueToLeaderFail.Error(), etcdserver.ErrTimeout.Error():
|
||||
// This retries when request is triggered at the same time as
|
||||
// leader failure and follower nodes receive time out errors
|
||||
// from losing their leader. Followers should retry to connect
|
||||
// to the new leader.
|
||||
case etcdserver.ErrStopped.Error():
|
||||
// one of the etcd nodes stopped from failure injection
|
||||
case transport.ErrConnClosing.Desc:
|
||||
// server closed the transport (failure injected node)
|
||||
case rpctypes.ErrNotCapable.Error():
|
||||
// capability check has not been done (in the beginning)
|
||||
case rpctypes.ErrTooManyRequests.Error():
|
||||
// hitting the recovering member.
|
||||
case context.Canceled.Error():
|
||||
// from stresser.Cancel method:
|
||||
return
|
||||
case grpc.ErrClientConnClosing.Error():
|
||||
// from stresser.Cancel method:
|
||||
return
|
||||
default:
|
||||
plog.Errorf("keyStresser %v exited with error (%v)", s.Endpoint, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *keyStresser) Pause() {
|
||||
s.Close()
|
||||
}
|
||||
|
||||
func (s *keyStresser) Close() {
|
||||
s.cancel()
|
||||
s.conn.Close()
|
||||
s.wg.Wait()
|
||||
plog.Infof("keyStresser %q is closed", s.Endpoint)
|
||||
|
||||
}
|
||||
|
||||
func (s *keyStresser) ModifiedKeys() int64 {
|
||||
return atomic.LoadInt64(&s.atomicModifiedKeys)
|
||||
}
|
||||
|
||||
func (s *keyStresser) Checker() Checker { return nil }
|
||||
|
||||
type stressFunc func(ctx context.Context) (err error, modifiedKeys int64)
|
||||
|
||||
type stressEntry struct {
|
||||
weight float32
|
||||
f stressFunc
|
||||
}
|
||||
|
||||
type stressTable struct {
|
||||
entries []stressEntry
|
||||
sumWeights float32
|
||||
}
|
||||
|
||||
func createStressTable(entries []stressEntry) *stressTable {
|
||||
st := stressTable{entries: entries}
|
||||
for _, entry := range st.entries {
|
||||
st.sumWeights += entry.weight
|
||||
}
|
||||
return &st
|
||||
}
|
||||
|
||||
func (st *stressTable) choose() stressFunc {
|
||||
v := rand.Float32() * st.sumWeights
|
||||
var sum float32
|
||||
var idx int
|
||||
for i := range st.entries {
|
||||
sum += st.entries[i].weight
|
||||
if sum >= v {
|
||||
idx = i
|
||||
break
|
||||
}
|
||||
}
|
||||
return st.entries[idx].f
|
||||
}
|
||||
|
||||
func newStressPut(kvc pb.KVClient, keySuffixRange, keySize int) stressFunc {
|
||||
return func(ctx context.Context) (error, int64) {
|
||||
_, err := kvc.Put(ctx, &pb.PutRequest{
|
||||
Key: []byte(fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange))),
|
||||
Value: randBytes(keySize),
|
||||
}, grpc.FailFast(false))
|
||||
return err, 1
|
||||
}
|
||||
}
|
||||
|
||||
func newStressTxn(kvc pb.KVClient, keyTxnSuffixRange, txnOps int) stressFunc {
|
||||
keys := make([]string, keyTxnSuffixRange)
|
||||
for i := range keys {
|
||||
keys[i] = fmt.Sprintf("/k%03d", i)
|
||||
}
|
||||
return writeTxn(kvc, keys, txnOps)
|
||||
}
|
||||
|
||||
func writeTxn(kvc pb.KVClient, keys []string, txnOps int) stressFunc {
|
||||
return func(ctx context.Context) (error, int64) {
|
||||
ks := make(map[string]struct{}, txnOps)
|
||||
for len(ks) != txnOps {
|
||||
ks[keys[rand.Intn(len(keys))]] = struct{}{}
|
||||
}
|
||||
selected := make([]string, 0, txnOps)
|
||||
for k := range ks {
|
||||
selected = append(selected, k)
|
||||
}
|
||||
com, delOp, putOp := getTxnReqs(selected[0], "bar00")
|
||||
txnReq := &pb.TxnRequest{
|
||||
Compare: []*pb.Compare{com},
|
||||
Success: []*pb.RequestOp{delOp},
|
||||
Failure: []*pb.RequestOp{putOp},
|
||||
}
|
||||
|
||||
// add nested txns if any
|
||||
for i := 1; i < txnOps; i++ {
|
||||
k, v := selected[i], fmt.Sprintf("bar%02d", i)
|
||||
com, delOp, putOp = getTxnReqs(k, v)
|
||||
nested := &pb.RequestOp{
|
||||
Request: &pb.RequestOp_RequestTxn{
|
||||
RequestTxn: &pb.TxnRequest{
|
||||
Compare: []*pb.Compare{com},
|
||||
Success: []*pb.RequestOp{delOp},
|
||||
Failure: []*pb.RequestOp{putOp},
|
||||
},
|
||||
},
|
||||
}
|
||||
txnReq.Success = append(txnReq.Success, nested)
|
||||
txnReq.Failure = append(txnReq.Failure, nested)
|
||||
}
|
||||
|
||||
_, err := kvc.Txn(ctx, txnReq, grpc.FailFast(false))
|
||||
return err, int64(txnOps)
|
||||
}
|
||||
}
|
||||
|
||||
func getTxnReqs(key, val string) (com *pb.Compare, delOp *pb.RequestOp, putOp *pb.RequestOp) {
|
||||
// if key exists (version > 0)
|
||||
com = &pb.Compare{
|
||||
Key: []byte(key),
|
||||
Target: pb.Compare_VERSION,
|
||||
Result: pb.Compare_GREATER,
|
||||
TargetUnion: &pb.Compare_Version{Version: 0},
|
||||
}
|
||||
delOp = &pb.RequestOp{
|
||||
Request: &pb.RequestOp_RequestDeleteRange{
|
||||
RequestDeleteRange: &pb.DeleteRangeRequest{
|
||||
Key: []byte(key),
|
||||
},
|
||||
},
|
||||
}
|
||||
putOp = &pb.RequestOp{
|
||||
Request: &pb.RequestOp_RequestPut{
|
||||
RequestPut: &pb.PutRequest{
|
||||
Key: []byte(key),
|
||||
Value: []byte(val),
|
||||
},
|
||||
},
|
||||
}
|
||||
return com, delOp, putOp
|
||||
}
|
||||
|
||||
func newStressRange(kvc pb.KVClient, keySuffixRange int) stressFunc {
|
||||
return func(ctx context.Context) (error, int64) {
|
||||
_, err := kvc.Range(ctx, &pb.RangeRequest{
|
||||
Key: []byte(fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange))),
|
||||
}, grpc.FailFast(false))
|
||||
return err, 0
|
||||
}
|
||||
}
|
||||
|
||||
func newStressRangeInterval(kvc pb.KVClient, keySuffixRange int) stressFunc {
|
||||
return func(ctx context.Context) (error, int64) {
|
||||
start := rand.Intn(keySuffixRange)
|
||||
end := start + 500
|
||||
_, err := kvc.Range(ctx, &pb.RangeRequest{
|
||||
Key: []byte(fmt.Sprintf("foo%016x", start)),
|
||||
RangeEnd: []byte(fmt.Sprintf("foo%016x", end)),
|
||||
}, grpc.FailFast(false))
|
||||
return err, 0
|
||||
}
|
||||
}
|
||||
|
||||
func newStressDelete(kvc pb.KVClient, keySuffixRange int) stressFunc {
|
||||
return func(ctx context.Context) (error, int64) {
|
||||
_, err := kvc.DeleteRange(ctx, &pb.DeleteRangeRequest{
|
||||
Key: []byte(fmt.Sprintf("foo%016x", rand.Intn(keySuffixRange))),
|
||||
}, grpc.FailFast(false))
|
||||
return err, 1
|
||||
}
|
||||
}
|
||||
|
||||
func newStressDeleteInterval(kvc pb.KVClient, keySuffixRange int) stressFunc {
|
||||
return func(ctx context.Context) (error, int64) {
|
||||
start := rand.Intn(keySuffixRange)
|
||||
end := start + 500
|
||||
resp, err := kvc.DeleteRange(ctx, &pb.DeleteRangeRequest{
|
||||
Key: []byte(fmt.Sprintf("foo%016x", start)),
|
||||
RangeEnd: []byte(fmt.Sprintf("foo%016x", end)),
|
||||
}, grpc.FailFast(false))
|
||||
if err == nil {
|
||||
return nil, resp.Deleted
|
||||
}
|
||||
return err, 0
|
||||
}
|
||||
}
|
@ -1,382 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
|
||||
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
"google.golang.org/grpc"
|
||||
)
|
||||
|
||||
const (
|
||||
// time to live for lease
|
||||
TTL = 120
|
||||
TTLShort = 2
|
||||
)
|
||||
|
||||
type leaseStresser struct {
|
||||
endpoint string
|
||||
cancel func()
|
||||
conn *grpc.ClientConn
|
||||
kvc pb.KVClient
|
||||
lc pb.LeaseClient
|
||||
ctx context.Context
|
||||
|
||||
rateLimiter *rate.Limiter
|
||||
// atomicModifiedKey records the number of keys created and deleted during a test case
|
||||
atomicModifiedKey int64
|
||||
numLeases int
|
||||
keysPerLease int
|
||||
|
||||
aliveLeases *atomicLeases
|
||||
revokedLeases *atomicLeases
|
||||
shortLivedLeases *atomicLeases
|
||||
|
||||
runWg sync.WaitGroup
|
||||
aliveWg sync.WaitGroup
|
||||
}
|
||||
|
||||
type atomicLeases struct {
|
||||
// rwLock is used to protect read/write access of leases map
|
||||
// which are accessed and modified by different go routines.
|
||||
rwLock sync.RWMutex
|
||||
leases map[int64]time.Time
|
||||
}
|
||||
|
||||
func (al *atomicLeases) add(leaseID int64, t time.Time) {
|
||||
al.rwLock.Lock()
|
||||
al.leases[leaseID] = t
|
||||
al.rwLock.Unlock()
|
||||
}
|
||||
|
||||
func (al *atomicLeases) update(leaseID int64, t time.Time) {
|
||||
al.rwLock.Lock()
|
||||
_, ok := al.leases[leaseID]
|
||||
if ok {
|
||||
al.leases[leaseID] = t
|
||||
}
|
||||
al.rwLock.Unlock()
|
||||
}
|
||||
|
||||
func (al *atomicLeases) read(leaseID int64) (rv time.Time, ok bool) {
|
||||
al.rwLock.RLock()
|
||||
rv, ok = al.leases[leaseID]
|
||||
al.rwLock.RUnlock()
|
||||
return rv, ok
|
||||
}
|
||||
|
||||
func (al *atomicLeases) remove(leaseID int64) {
|
||||
al.rwLock.Lock()
|
||||
delete(al.leases, leaseID)
|
||||
al.rwLock.Unlock()
|
||||
}
|
||||
|
||||
func (al *atomicLeases) getLeasesMap() map[int64]time.Time {
|
||||
leasesCopy := make(map[int64]time.Time)
|
||||
al.rwLock.RLock()
|
||||
for k, v := range al.leases {
|
||||
leasesCopy[k] = v
|
||||
}
|
||||
al.rwLock.RUnlock()
|
||||
return leasesCopy
|
||||
}
|
||||
|
||||
func (ls *leaseStresser) setupOnce() error {
|
||||
if ls.aliveLeases != nil {
|
||||
return nil
|
||||
}
|
||||
if ls.numLeases == 0 {
|
||||
panic("expect numLeases to be set")
|
||||
}
|
||||
if ls.keysPerLease == 0 {
|
||||
panic("expect keysPerLease to be set")
|
||||
}
|
||||
|
||||
ls.aliveLeases = &atomicLeases{leases: make(map[int64]time.Time)}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ls *leaseStresser) Stress() error {
|
||||
plog.Infof("lease Stresser %v starting ...", ls.endpoint)
|
||||
if err := ls.setupOnce(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
conn, err := grpc.Dial(ls.endpoint, grpc.WithInsecure(), grpc.WithBackoffMaxDelay(1*time.Second))
|
||||
if err != nil {
|
||||
return fmt.Errorf("%v (%s)", err, ls.endpoint)
|
||||
}
|
||||
ls.conn = conn
|
||||
ls.kvc = pb.NewKVClient(conn)
|
||||
ls.lc = pb.NewLeaseClient(conn)
|
||||
ls.revokedLeases = &atomicLeases{leases: make(map[int64]time.Time)}
|
||||
ls.shortLivedLeases = &atomicLeases{leases: make(map[int64]time.Time)}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
ls.cancel = cancel
|
||||
ls.ctx = ctx
|
||||
|
||||
ls.runWg.Add(1)
|
||||
go ls.run()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ls *leaseStresser) run() {
|
||||
defer ls.runWg.Done()
|
||||
ls.restartKeepAlives()
|
||||
for {
|
||||
// the number of keys created and deleted is roughly 2x the number of created keys for an iteration.
|
||||
// the rateLimiter therefore consumes 2x ls.numLeases*ls.keysPerLease tokens where each token represents a create/delete operation for key.
|
||||
err := ls.rateLimiter.WaitN(ls.ctx, 2*ls.numLeases*ls.keysPerLease)
|
||||
if err == context.Canceled {
|
||||
return
|
||||
}
|
||||
plog.Debugf("creating lease on %v", ls.endpoint)
|
||||
ls.createLeases()
|
||||
plog.Debugf("done creating lease on %v", ls.endpoint)
|
||||
plog.Debugf("dropping lease on %v", ls.endpoint)
|
||||
ls.randomlyDropLeases()
|
||||
plog.Debugf("done dropping lease on %v", ls.endpoint)
|
||||
}
|
||||
}
|
||||
|
||||
func (ls *leaseStresser) restartKeepAlives() {
|
||||
for leaseID := range ls.aliveLeases.getLeasesMap() {
|
||||
ls.aliveWg.Add(1)
|
||||
go func(id int64) {
|
||||
ls.keepLeaseAlive(id)
|
||||
}(leaseID)
|
||||
}
|
||||
}
|
||||
|
||||
func (ls *leaseStresser) createLeases() {
|
||||
ls.createAliveLeases()
|
||||
ls.createShortLivedLeases()
|
||||
}
|
||||
|
||||
func (ls *leaseStresser) createAliveLeases() {
|
||||
neededLeases := ls.numLeases - len(ls.aliveLeases.getLeasesMap())
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < neededLeases; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
leaseID, err := ls.createLeaseWithKeys(TTL)
|
||||
if err != nil {
|
||||
plog.Debugf("lease creation error: (%v)", err)
|
||||
return
|
||||
}
|
||||
ls.aliveLeases.add(leaseID, time.Now())
|
||||
// keep track of all the keep lease alive go routines
|
||||
ls.aliveWg.Add(1)
|
||||
go ls.keepLeaseAlive(leaseID)
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func (ls *leaseStresser) createShortLivedLeases() {
|
||||
// one round of createLeases() might not create all the short lived leases we want due to falures.
|
||||
// thus, we want to create remaining short lived leases in the future round.
|
||||
neededLeases := ls.numLeases - len(ls.shortLivedLeases.getLeasesMap())
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < neededLeases; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
leaseID, err := ls.createLeaseWithKeys(TTLShort)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
ls.shortLivedLeases.add(leaseID, time.Now())
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func (ls *leaseStresser) createLeaseWithKeys(ttl int64) (int64, error) {
|
||||
leaseID, err := ls.createLease(ttl)
|
||||
if err != nil {
|
||||
plog.Debugf("lease creation error: (%v)", err)
|
||||
return -1, err
|
||||
}
|
||||
plog.Debugf("lease %v created ", leaseID)
|
||||
if err := ls.attachKeysWithLease(leaseID); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return leaseID, nil
|
||||
}
|
||||
|
||||
func (ls *leaseStresser) randomlyDropLeases() {
|
||||
var wg sync.WaitGroup
|
||||
for l := range ls.aliveLeases.getLeasesMap() {
|
||||
wg.Add(1)
|
||||
go func(leaseID int64) {
|
||||
defer wg.Done()
|
||||
dropped, err := ls.randomlyDropLease(leaseID)
|
||||
// if randomlyDropLease encountered an error such as context is cancelled, remove the lease from aliveLeases
|
||||
// because we can't tell whether the lease is dropped or not.
|
||||
if err != nil {
|
||||
plog.Debugf("drop lease %v has failed error (%v)", leaseID, err)
|
||||
ls.aliveLeases.remove(leaseID)
|
||||
return
|
||||
}
|
||||
if !dropped {
|
||||
return
|
||||
}
|
||||
plog.Debugf("lease %v dropped", leaseID)
|
||||
ls.revokedLeases.add(leaseID, time.Now())
|
||||
ls.aliveLeases.remove(leaseID)
|
||||
}(l)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func (ls *leaseStresser) createLease(ttl int64) (int64, error) {
|
||||
resp, err := ls.lc.LeaseGrant(ls.ctx, &pb.LeaseGrantRequest{TTL: ttl})
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return resp.ID, nil
|
||||
}
|
||||
|
||||
func (ls *leaseStresser) keepLeaseAlive(leaseID int64) {
|
||||
defer ls.aliveWg.Done()
|
||||
ctx, cancel := context.WithCancel(ls.ctx)
|
||||
stream, err := ls.lc.LeaseKeepAlive(ctx)
|
||||
defer func() { cancel() }()
|
||||
for {
|
||||
select {
|
||||
case <-time.After(500 * time.Millisecond):
|
||||
case <-ls.ctx.Done():
|
||||
plog.Debugf("keepLeaseAlive lease %v context canceled ", leaseID)
|
||||
// it is possible that lease expires at invariant checking phase but not at keepLeaseAlive() phase.
|
||||
// this scenerio is possible when alive lease is just about to expire when keepLeaseAlive() exists and expires at invariant checking phase.
|
||||
// to circumvent that scenerio, we check each lease before keepalive loop exist to see if it has been renewed in last TTL/2 duration.
|
||||
// if it is renewed, this means that invariant checking have at least ttl/2 time before lease exipres which is long enough for the checking to finish.
|
||||
// if it is not renewed, we remove the lease from the alive map so that the lease doesn't exipre during invariant checking
|
||||
renewTime, ok := ls.aliveLeases.read(leaseID)
|
||||
if ok && renewTime.Add(TTL/2*time.Second).Before(time.Now()) {
|
||||
ls.aliveLeases.remove(leaseID)
|
||||
plog.Debugf("keepLeaseAlive lease %v has not been renewed. drop it.", leaseID)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
plog.Debugf("keepLeaseAlive lease %v creates stream error: (%v)", leaseID, err)
|
||||
cancel()
|
||||
ctx, cancel = context.WithCancel(ls.ctx)
|
||||
stream, err = ls.lc.LeaseKeepAlive(ctx)
|
||||
cancel()
|
||||
continue
|
||||
}
|
||||
err = stream.Send(&pb.LeaseKeepAliveRequest{ID: leaseID})
|
||||
plog.Debugf("keepLeaseAlive stream sends lease %v keepalive request", leaseID)
|
||||
if err != nil {
|
||||
plog.Debugf("keepLeaseAlive stream sends lease %v error (%v)", leaseID, err)
|
||||
continue
|
||||
}
|
||||
leaseRenewTime := time.Now()
|
||||
plog.Debugf("keepLeaseAlive stream sends lease %v keepalive request succeed", leaseID)
|
||||
respRC, err := stream.Recv()
|
||||
if err != nil {
|
||||
plog.Debugf("keepLeaseAlive stream receives lease %v stream error (%v)", leaseID, err)
|
||||
continue
|
||||
}
|
||||
// lease expires after TTL become 0
|
||||
// don't send keepalive if the lease has expired
|
||||
if respRC.TTL <= 0 {
|
||||
plog.Debugf("keepLeaseAlive stream receives lease %v has TTL <= 0", leaseID)
|
||||
ls.aliveLeases.remove(leaseID)
|
||||
return
|
||||
}
|
||||
// renew lease timestamp only if lease is present
|
||||
plog.Debugf("keepLeaseAlive renew lease %v", leaseID)
|
||||
ls.aliveLeases.update(leaseID, leaseRenewTime)
|
||||
}
|
||||
}
|
||||
|
||||
// attachKeysWithLease function attaches keys to the lease.
|
||||
// the format of key is the concat of leaseID + '_' + '<order of key creation>'
|
||||
// e.g 5186835655248304152_0 for first created key and 5186835655248304152_1 for second created key
|
||||
func (ls *leaseStresser) attachKeysWithLease(leaseID int64) error {
|
||||
var txnPuts []*pb.RequestOp
|
||||
for j := 0; j < ls.keysPerLease; j++ {
|
||||
txnput := &pb.RequestOp{Request: &pb.RequestOp_RequestPut{RequestPut: &pb.PutRequest{Key: []byte(fmt.Sprintf("%d%s%d", leaseID, "_", j)),
|
||||
Value: []byte(fmt.Sprintf("bar")), Lease: leaseID}}}
|
||||
txnPuts = append(txnPuts, txnput)
|
||||
}
|
||||
// keep retrying until lease is not found or ctx is being canceled
|
||||
for ls.ctx.Err() == nil {
|
||||
txn := &pb.TxnRequest{Success: txnPuts}
|
||||
_, err := ls.kvc.Txn(ls.ctx, txn)
|
||||
if err == nil {
|
||||
// since all created keys will be deleted too, the number of operations on keys will be roughly 2x the number of created keys
|
||||
atomic.AddInt64(&ls.atomicModifiedKey, 2*int64(ls.keysPerLease))
|
||||
return nil
|
||||
}
|
||||
if rpctypes.Error(err) == rpctypes.ErrLeaseNotFound {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return ls.ctx.Err()
|
||||
}
|
||||
|
||||
// randomlyDropLease drops the lease only when the rand.Int(2) returns 1.
|
||||
// This creates a 50/50 percents chance of dropping a lease
|
||||
func (ls *leaseStresser) randomlyDropLease(leaseID int64) (bool, error) {
|
||||
if rand.Intn(2) != 0 {
|
||||
return false, nil
|
||||
}
|
||||
// keep retrying until a lease is dropped or ctx is being canceled
|
||||
for ls.ctx.Err() == nil {
|
||||
_, err := ls.lc.LeaseRevoke(ls.ctx, &pb.LeaseRevokeRequest{ID: leaseID})
|
||||
if err == nil || rpctypes.Error(err) == rpctypes.ErrLeaseNotFound {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
plog.Debugf("randomlyDropLease error: (%v)", ls.ctx.Err())
|
||||
return false, ls.ctx.Err()
|
||||
}
|
||||
|
||||
func (ls *leaseStresser) Pause() {
|
||||
ls.Close()
|
||||
}
|
||||
|
||||
func (ls *leaseStresser) Close() {
|
||||
plog.Debugf("lease stresser %q is closing...", ls.endpoint)
|
||||
ls.cancel()
|
||||
ls.runWg.Wait()
|
||||
ls.aliveWg.Wait()
|
||||
ls.conn.Close()
|
||||
plog.Infof("lease stresser %q is closed", ls.endpoint)
|
||||
}
|
||||
|
||||
func (ls *leaseStresser) ModifiedKeys() int64 {
|
||||
return atomic.LoadInt64(&ls.atomicModifiedKey)
|
||||
}
|
||||
|
||||
func (ls *leaseStresser) Checker() Checker { return &leaseChecker{endpoint: ls.endpoint, ls: ls} }
|
@ -1,232 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/coreos/etcd/pkg/debugutil"
|
||||
|
||||
"github.com/coreos/pkg/capnslog"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"golang.org/x/time/rate"
|
||||
"google.golang.org/grpc/grpclog"
|
||||
)
|
||||
|
||||
var plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "etcd-tester")
|
||||
|
||||
const (
|
||||
defaultClientPort = 2379
|
||||
defaultPeerPort = 2380
|
||||
defaultFailpointPort = 2381
|
||||
)
|
||||
|
||||
func main() {
|
||||
endpointStr := flag.String("agent-endpoints", "localhost:9027", "HTTP RPC endpoints of agents. Do not specify the schema.")
|
||||
clientPorts := flag.String("client-ports", "", "etcd client port for each agent endpoint")
|
||||
advertiseClientPorts := flag.String("advertise-client-ports", "", "etcd advertise client port for each agent endpoint")
|
||||
peerPorts := flag.String("peer-ports", "", "etcd peer port for each agent endpoint")
|
||||
advertisePeerPorts := flag.String("advertise-peer-ports", "", "etcd advertise peer port for each agent endpoint")
|
||||
failpointPorts := flag.String("failpoint-ports", "", "etcd failpoint port for each agent endpoint")
|
||||
|
||||
stressKeyLargeSize := flag.Uint("stress-key-large-size", 32*1024+1, "the size of each large key written into etcd.")
|
||||
stressKeySize := flag.Uint("stress-key-size", 100, "the size of each small key written into etcd.")
|
||||
stressKeySuffixRange := flag.Uint("stress-key-count", 250000, "the count of key range written into etcd.")
|
||||
stressKeyTxnSuffixRange := flag.Uint("stress-key-txn-count", 100, "the count of key range written into etcd txn (max 100).")
|
||||
stressKeyTxnOps := flag.Uint("stress-key-txn-ops", 1, "number of operations per a transaction (max 64).")
|
||||
limit := flag.Int("limit", -1, "the limit of rounds to run failure set (-1 to run without limits).")
|
||||
exitOnFailure := flag.Bool("exit-on-failure", false, "exit tester on first failure")
|
||||
stressQPS := flag.Int("stress-qps", 10000, "maximum number of stresser requests per second.")
|
||||
schedCases := flag.String("schedule-cases", "", "test case schedule")
|
||||
consistencyCheck := flag.Bool("consistency-check", true, "true to check consistency (revision, hash)")
|
||||
stresserType := flag.String("stresser", "keys,lease", "comma separated list of stressers (keys, lease, v2keys, nop, election-runner, watch-runner, lock-racer-runner, lease-runner).")
|
||||
etcdRunnerPath := flag.String("etcd-runner", "", "specify a path of etcd runner binary")
|
||||
failureTypes := flag.String("failures", "default,failpoints", "specify failures (concat of \"default\" and \"failpoints\").")
|
||||
failpoints := flag.String("failpoints", `panic("etcd-tester")`, `comma separated list of failpoint terms to inject (e.g. 'panic("etcd-tester"),1*sleep(1000)')`)
|
||||
externalFailures := flag.String("external-failures", "", "specify a path of script for enabling/disabling an external fault injector")
|
||||
enablePprof := flag.Bool("enable-pprof", false, "true to enable pprof")
|
||||
flag.Parse()
|
||||
|
||||
// to discard gRPC-side balancer logs
|
||||
grpclog.SetLoggerV2(grpclog.NewLoggerV2(ioutil.Discard, ioutil.Discard, ioutil.Discard))
|
||||
|
||||
eps := strings.Split(*endpointStr, ",")
|
||||
cports := portsFromArg(*clientPorts, len(eps), defaultClientPort)
|
||||
acports := portsFromArg(*advertiseClientPorts, len(eps), defaultClientPort)
|
||||
pports := portsFromArg(*peerPorts, len(eps), defaultPeerPort)
|
||||
apports := portsFromArg(*advertisePeerPorts, len(eps), defaultPeerPort)
|
||||
fports := portsFromArg(*failpointPorts, len(eps), defaultFailpointPort)
|
||||
agents := make([]agentConfig, len(eps))
|
||||
|
||||
for i := range eps {
|
||||
agents[i].endpoint = eps[i]
|
||||
agents[i].clientPort = cports[i]
|
||||
agents[i].advertiseClientPort = acports[i]
|
||||
agents[i].peerPort = pports[i]
|
||||
agents[i].advertisePeerPort = apports[i]
|
||||
agents[i].failpointPort = fports[i]
|
||||
}
|
||||
|
||||
c := &cluster{agents: agents}
|
||||
if err := c.bootstrap(); err != nil {
|
||||
plog.Fatal(err)
|
||||
}
|
||||
defer c.Terminate()
|
||||
|
||||
// ensure cluster is fully booted to know failpoints are available
|
||||
c.WaitHealth()
|
||||
|
||||
var failures []failure
|
||||
|
||||
if failureTypes != nil && *failureTypes != "" {
|
||||
types, failpoints := strings.Split(*failureTypes, ","), strings.Split(*failpoints, ",")
|
||||
failures = makeFailures(types, failpoints, c)
|
||||
}
|
||||
|
||||
if externalFailures != nil && *externalFailures != "" {
|
||||
if len(failures) != 0 {
|
||||
plog.Errorf("specify only one of -failures or -external-failures")
|
||||
os.Exit(1)
|
||||
}
|
||||
failures = append(failures, newFailureExternal(*externalFailures))
|
||||
}
|
||||
|
||||
if len(failures) == 0 {
|
||||
plog.Infof("no failures\n")
|
||||
failures = append(failures, newFailureNop())
|
||||
}
|
||||
|
||||
schedule := failures
|
||||
if schedCases != nil && *schedCases != "" {
|
||||
cases := strings.Split(*schedCases, " ")
|
||||
schedule = make([]failure, len(cases))
|
||||
for i := range cases {
|
||||
caseNum := 0
|
||||
n, err := fmt.Sscanf(cases[i], "%d", &caseNum)
|
||||
if n == 0 || err != nil {
|
||||
plog.Fatalf(`couldn't parse case "%s" (%v)`, cases[i], err)
|
||||
}
|
||||
schedule[i] = failures[caseNum]
|
||||
}
|
||||
}
|
||||
|
||||
scfg := stressConfig{
|
||||
rateLimiter: rate.NewLimiter(rate.Limit(*stressQPS), *stressQPS),
|
||||
keyLargeSize: int(*stressKeyLargeSize),
|
||||
keySize: int(*stressKeySize),
|
||||
keySuffixRange: int(*stressKeySuffixRange),
|
||||
keyTxnSuffixRange: int(*stressKeyTxnSuffixRange),
|
||||
keyTxnOps: int(*stressKeyTxnOps),
|
||||
numLeases: 10,
|
||||
keysPerLease: 10,
|
||||
|
||||
etcdRunnerPath: *etcdRunnerPath,
|
||||
}
|
||||
if scfg.keyTxnSuffixRange > 100 {
|
||||
plog.Fatalf("stress-key-txn-count is maximum 100, got %d", scfg.keyTxnSuffixRange)
|
||||
}
|
||||
if scfg.keyTxnOps > 64 {
|
||||
plog.Fatalf("stress-key-txn-ops is maximum 64, got %d", scfg.keyTxnOps)
|
||||
}
|
||||
|
||||
t := &tester{
|
||||
failures: schedule,
|
||||
cluster: c,
|
||||
limit: *limit,
|
||||
exitOnFailure: *exitOnFailure,
|
||||
|
||||
scfg: scfg,
|
||||
stresserType: *stresserType,
|
||||
doChecks: *consistencyCheck,
|
||||
}
|
||||
|
||||
sh := statusHandler{status: &t.status}
|
||||
http.Handle("/status", sh)
|
||||
http.Handle("/metrics", promhttp.Handler())
|
||||
|
||||
if *enablePprof {
|
||||
for p, h := range debugutil.PProfHandlers() {
|
||||
http.Handle(p, h)
|
||||
}
|
||||
}
|
||||
|
||||
go func() { plog.Fatal(http.ListenAndServe(":9028", nil)) }()
|
||||
|
||||
t.runLoop()
|
||||
}
|
||||
|
||||
// portsFromArg converts a comma separated list into a slice of ints
|
||||
func portsFromArg(arg string, n, defaultPort int) []int {
|
||||
ret := make([]int, n)
|
||||
if len(arg) == 0 {
|
||||
for i := range ret {
|
||||
ret[i] = defaultPort
|
||||
}
|
||||
return ret
|
||||
}
|
||||
s := strings.Split(arg, ",")
|
||||
if len(s) != n {
|
||||
fmt.Printf("expected %d ports, got %d (%s)\n", n, len(s), arg)
|
||||
os.Exit(1)
|
||||
}
|
||||
for i := range s {
|
||||
if _, err := fmt.Sscanf(s[i], "%d", &ret[i]); err != nil {
|
||||
fmt.Println(err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func makeFailures(types, failpoints []string, c *cluster) []failure {
|
||||
var failures []failure
|
||||
for i := range types {
|
||||
switch types[i] {
|
||||
case "default":
|
||||
defaultFailures := []failure{
|
||||
newFailureKillAll(),
|
||||
newFailureKillMajority(),
|
||||
newFailureKillOne(),
|
||||
newFailureKillLeader(),
|
||||
newFailureKillOneForLongTime(),
|
||||
newFailureKillLeaderForLongTime(),
|
||||
newFailureIsolate(),
|
||||
newFailureIsolateAll(),
|
||||
newFailureSlowNetworkOneMember(),
|
||||
newFailureSlowNetworkLeader(),
|
||||
newFailureSlowNetworkAll(),
|
||||
}
|
||||
failures = append(failures, defaultFailures...)
|
||||
|
||||
case "failpoints":
|
||||
fpFailures, fperr := failpointFailures(c, failpoints)
|
||||
if len(fpFailures) == 0 {
|
||||
plog.Infof("no failpoints found (%v)", fperr)
|
||||
}
|
||||
failures = append(failures, fpFailures...)
|
||||
|
||||
default:
|
||||
plog.Errorf("unknown failure: %s\n", types[i])
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
return failures
|
||||
}
|
@ -1,188 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/url"
|
||||
"time"
|
||||
|
||||
"github.com/coreos/etcd/clientv3"
|
||||
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
||||
"github.com/coreos/etcd/tools/functional-tester/etcd-agent/client"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
)
|
||||
|
||||
type member struct {
|
||||
Agent client.Agent
|
||||
Endpoint string
|
||||
Name string
|
||||
ClientURL string
|
||||
AdvertiseClientURL string
|
||||
PeerURL string
|
||||
AdvertisePeerURL string
|
||||
FailpointURL string
|
||||
}
|
||||
|
||||
func (m *member) ClusterEntry() string { return m.Name + "=" + m.AdvertisePeerURL }
|
||||
|
||||
func (m *member) Flags() []string {
|
||||
return []string{
|
||||
"--name", m.Name,
|
||||
"--listen-client-urls", m.ClientURL,
|
||||
"--advertise-client-urls", m.AdvertiseClientURL,
|
||||
"--listen-peer-urls", m.PeerURL,
|
||||
"--initial-advertise-peer-urls", m.AdvertisePeerURL,
|
||||
"--initial-cluster-state", "new",
|
||||
"--experimental-initial-corrupt-check",
|
||||
}
|
||||
}
|
||||
|
||||
func (m *member) CheckCompact(rev int64) error {
|
||||
cli, err := m.newClientV3()
|
||||
if err != nil {
|
||||
return fmt.Errorf("%v (endpoint %s)", err, m.AdvertiseClientURL)
|
||||
}
|
||||
defer cli.Close()
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
wch := cli.Watch(ctx, "\x00", clientv3.WithFromKey(), clientv3.WithRev(rev-1))
|
||||
wr, ok := <-wch
|
||||
cancel()
|
||||
|
||||
if !ok {
|
||||
return fmt.Errorf("watch channel terminated (endpoint %s)", m.AdvertiseClientURL)
|
||||
}
|
||||
if wr.CompactRevision != rev {
|
||||
return fmt.Errorf("got compact revision %v, wanted %v (endpoint %s)", wr.CompactRevision, rev, m.AdvertiseClientURL)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *member) Defrag() error {
|
||||
plog.Printf("defragmenting %s\n", m.AdvertiseClientURL)
|
||||
cli, err := m.newClientV3()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer cli.Close()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
|
||||
_, err = cli.Defragment(ctx, m.AdvertiseClientURL)
|
||||
cancel()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
plog.Printf("defragmented %s\n", m.AdvertiseClientURL)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *member) RevHash() (int64, int64, error) {
|
||||
conn, err := m.dialGRPC()
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
mt := pb.NewMaintenanceClient(conn)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
resp, err := mt.Hash(ctx, &pb.HashRequest{}, grpc.FailFast(false))
|
||||
cancel()
|
||||
conn.Close()
|
||||
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
return resp.Header.Revision, int64(resp.Hash), nil
|
||||
}
|
||||
|
||||
func (m *member) Rev(ctx context.Context) (int64, error) {
|
||||
cli, err := m.newClientV3()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer cli.Close()
|
||||
resp, err := cli.Status(ctx, m.AdvertiseClientURL)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return resp.Header.Revision, nil
|
||||
}
|
||||
|
||||
func (m *member) IsLeader() (bool, error) {
|
||||
cli, err := m.newClientV3()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
defer cli.Close()
|
||||
resp, err := cli.Status(context.Background(), m.AdvertiseClientURL)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return resp.Header.MemberId == resp.Leader, nil
|
||||
}
|
||||
|
||||
func (m *member) SetHealthKeyV3() error {
|
||||
cli, err := m.newClientV3()
|
||||
if err != nil {
|
||||
return fmt.Errorf("%v (%s)", err, m.AdvertiseClientURL)
|
||||
}
|
||||
defer cli.Close()
|
||||
// give enough time-out in case expensive requests (range/delete) are pending
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
_, err = cli.Put(ctx, "health", "good")
|
||||
cancel()
|
||||
if err != nil {
|
||||
return fmt.Errorf("%v (%s)", err, m.AdvertiseClientURL)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *member) newClientV3() (*clientv3.Client, error) {
|
||||
return clientv3.New(clientv3.Config{
|
||||
Endpoints: []string{m.AdvertiseClientURL},
|
||||
DialTimeout: 5 * time.Second,
|
||||
})
|
||||
}
|
||||
|
||||
func (m *member) dialGRPC() (*grpc.ClientConn, error) {
|
||||
return grpc.Dial(m.grpcAddr(), grpc.WithInsecure(), grpc.WithTimeout(5*time.Second), grpc.WithBlock())
|
||||
}
|
||||
|
||||
// grpcAddr gets the host from clientURL so it works with grpc.Dial()
|
||||
func (m *member) grpcAddr() string {
|
||||
u, err := url.Parse(m.AdvertiseClientURL)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return u.Host
|
||||
}
|
||||
|
||||
func (m *member) peerPort() (port int) {
|
||||
u, err := url.Parse(m.AdvertisePeerURL)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
_, portStr, err := net.SplitHostPort(u.Host)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if _, err = fmt.Sscanf(portStr, "%d", &port); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return port
|
||||
}
|
@ -1,64 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
var (
|
||||
caseTotalCounter = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "etcd",
|
||||
Subsystem: "funcational_tester",
|
||||
Name: "case_total",
|
||||
Help: "Total number of finished test cases",
|
||||
},
|
||||
[]string{"desc"},
|
||||
)
|
||||
|
||||
caseFailedTotalCounter = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "etcd",
|
||||
Subsystem: "funcational_tester",
|
||||
Name: "case_failed_total",
|
||||
Help: "Total number of failed test cases",
|
||||
},
|
||||
[]string{"desc"},
|
||||
)
|
||||
|
||||
roundTotalCounter = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "etcd",
|
||||
Subsystem: "funcational_tester",
|
||||
Name: "round_total",
|
||||
Help: "Total number of finished test rounds.",
|
||||
})
|
||||
|
||||
roundFailedTotalCounter = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Namespace: "etcd",
|
||||
Subsystem: "funcational_tester",
|
||||
Name: "round_failed_total",
|
||||
Help: "Total number of failed test rounds.",
|
||||
})
|
||||
)
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(caseTotalCounter)
|
||||
prometheus.MustRegister(caseFailedTotalCounter)
|
||||
prometheus.MustRegister(roundTotalCounter)
|
||||
prometheus.MustRegister(roundFailedTotalCounter)
|
||||
}
|
@ -1,57 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Status struct {
|
||||
Since time.Time
|
||||
Failures []string
|
||||
RoundLimit int
|
||||
|
||||
Cluster ClusterStatus
|
||||
cluster *cluster
|
||||
|
||||
mu sync.Mutex // guards Round and Case
|
||||
Round int
|
||||
Case int
|
||||
}
|
||||
|
||||
func (s *Status) setRound(r int) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.Round = r
|
||||
}
|
||||
|
||||
func (s *Status) getRound() int {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
return s.Round
|
||||
}
|
||||
|
||||
func (s *Status) setCase(c int) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.Case = c
|
||||
}
|
||||
|
||||
func (s *Status) getCase() int {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
return s.Case
|
||||
}
|
@ -1,218 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
)
|
||||
|
||||
type Stresser interface {
|
||||
// Stress starts to stress the etcd cluster
|
||||
Stress() error
|
||||
// Pause stops the stresser from sending requests to etcd. Resume by calling Stress.
|
||||
Pause()
|
||||
// Close releases all of the Stresser's resources.
|
||||
Close()
|
||||
// ModifiedKeys reports the number of keys created and deleted by stresser
|
||||
ModifiedKeys() int64
|
||||
// Checker returns an invariant checker for after the stresser is canceled.
|
||||
Checker() Checker
|
||||
}
|
||||
|
||||
// nopStresser implements Stresser that does nothing
|
||||
type nopStresser struct {
|
||||
start time.Time
|
||||
qps int
|
||||
}
|
||||
|
||||
func (s *nopStresser) Stress() error { return nil }
|
||||
func (s *nopStresser) Pause() {}
|
||||
func (s *nopStresser) Close() {}
|
||||
func (s *nopStresser) ModifiedKeys() int64 {
|
||||
return 0
|
||||
}
|
||||
func (s *nopStresser) Checker() Checker { return nil }
|
||||
|
||||
// compositeStresser implements a Stresser that runs a slice of
|
||||
// stressers concurrently.
|
||||
type compositeStresser struct {
|
||||
stressers []Stresser
|
||||
}
|
||||
|
||||
func (cs *compositeStresser) Stress() error {
|
||||
for i, s := range cs.stressers {
|
||||
if err := s.Stress(); err != nil {
|
||||
for j := 0; j < i; j++ {
|
||||
cs.stressers[i].Close()
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cs *compositeStresser) Pause() {
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(cs.stressers))
|
||||
for i := range cs.stressers {
|
||||
go func(s Stresser) {
|
||||
defer wg.Done()
|
||||
s.Pause()
|
||||
}(cs.stressers[i])
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func (cs *compositeStresser) Close() {
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(cs.stressers))
|
||||
for i := range cs.stressers {
|
||||
go func(s Stresser) {
|
||||
defer wg.Done()
|
||||
s.Close()
|
||||
}(cs.stressers[i])
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func (cs *compositeStresser) ModifiedKeys() (modifiedKey int64) {
|
||||
for _, stress := range cs.stressers {
|
||||
modifiedKey += stress.ModifiedKeys()
|
||||
}
|
||||
return modifiedKey
|
||||
}
|
||||
|
||||
func (cs *compositeStresser) Checker() Checker {
|
||||
var chks []Checker
|
||||
for _, s := range cs.stressers {
|
||||
if chk := s.Checker(); chk != nil {
|
||||
chks = append(chks, chk)
|
||||
}
|
||||
}
|
||||
if len(chks) == 0 {
|
||||
return nil
|
||||
}
|
||||
return newCompositeChecker(chks)
|
||||
}
|
||||
|
||||
type stressConfig struct {
|
||||
keyLargeSize int
|
||||
keySize int
|
||||
keySuffixRange int
|
||||
keyTxnSuffixRange int
|
||||
keyTxnOps int
|
||||
|
||||
numLeases int
|
||||
keysPerLease int
|
||||
|
||||
rateLimiter *rate.Limiter
|
||||
|
||||
etcdRunnerPath string
|
||||
}
|
||||
|
||||
// NewStresser creates stresser from a comma separated list of stresser types.
|
||||
func NewStresser(s string, sc *stressConfig, m *member) Stresser {
|
||||
types := strings.Split(s, ",")
|
||||
if len(types) > 1 {
|
||||
stressers := make([]Stresser, len(types))
|
||||
for i, stype := range types {
|
||||
stressers[i] = NewStresser(stype, sc, m)
|
||||
}
|
||||
return &compositeStresser{stressers}
|
||||
}
|
||||
switch s {
|
||||
case "nop":
|
||||
return &nopStresser{start: time.Now(), qps: int(sc.rateLimiter.Limit())}
|
||||
case "keys":
|
||||
// TODO: Too intensive stressers can panic etcd member with
|
||||
// 'out of memory' error. Put rate limits in server side.
|
||||
return &keyStresser{
|
||||
Endpoint: m.grpcAddr(),
|
||||
keyLargeSize: sc.keyLargeSize,
|
||||
keySize: sc.keySize,
|
||||
keySuffixRange: sc.keySuffixRange,
|
||||
keyTxnSuffixRange: sc.keyTxnSuffixRange,
|
||||
keyTxnOps: sc.keyTxnOps,
|
||||
N: 100,
|
||||
rateLimiter: sc.rateLimiter,
|
||||
}
|
||||
case "v2keys":
|
||||
return &v2Stresser{
|
||||
Endpoint: m.ClientURL,
|
||||
keySize: sc.keySize,
|
||||
keySuffixRange: sc.keySuffixRange,
|
||||
N: 100,
|
||||
rateLimiter: sc.rateLimiter,
|
||||
}
|
||||
case "lease":
|
||||
return &leaseStresser{
|
||||
endpoint: m.grpcAddr(),
|
||||
numLeases: sc.numLeases,
|
||||
keysPerLease: sc.keysPerLease,
|
||||
rateLimiter: sc.rateLimiter,
|
||||
}
|
||||
case "election-runner":
|
||||
reqRate := 100
|
||||
args := []string{
|
||||
"election",
|
||||
fmt.Sprintf("%v", time.Now().UnixNano()), // election name as current nano time
|
||||
"--dial-timeout=10s",
|
||||
"--endpoints", m.grpcAddr(),
|
||||
"--total-client-connections=10",
|
||||
"--rounds=0", // runs forever
|
||||
"--req-rate", fmt.Sprintf("%v", reqRate),
|
||||
}
|
||||
return newRunnerStresser(sc.etcdRunnerPath, args, sc.rateLimiter, reqRate)
|
||||
case "watch-runner":
|
||||
reqRate := 100
|
||||
args := []string{
|
||||
"watcher",
|
||||
"--prefix", fmt.Sprintf("%v", time.Now().UnixNano()), // prefix all keys with nano time
|
||||
"--total-keys=1",
|
||||
"--total-prefixes=1",
|
||||
"--watch-per-prefix=1",
|
||||
"--endpoints", m.grpcAddr(),
|
||||
"--rounds=0", // runs forever
|
||||
"--req-rate", fmt.Sprintf("%v", reqRate),
|
||||
}
|
||||
return newRunnerStresser(sc.etcdRunnerPath, args, sc.rateLimiter, reqRate)
|
||||
case "lock-racer-runner":
|
||||
reqRate := 100
|
||||
args := []string{
|
||||
"lock-racer",
|
||||
fmt.Sprintf("%v", time.Now().UnixNano()), // locker name as current nano time
|
||||
"--endpoints", m.grpcAddr(),
|
||||
"--total-client-connections=10",
|
||||
"--rounds=0", // runs forever
|
||||
"--req-rate", fmt.Sprintf("%v", reqRate),
|
||||
}
|
||||
return newRunnerStresser(sc.etcdRunnerPath, args, sc.rateLimiter, reqRate)
|
||||
case "lease-runner":
|
||||
args := []string{
|
||||
"lease-renewer",
|
||||
"--ttl=30",
|
||||
"--endpoints", m.grpcAddr(),
|
||||
}
|
||||
return newRunnerStresser(sc.etcdRunnerPath, args, sc.rateLimiter, 0)
|
||||
default:
|
||||
plog.Panicf("unknown stresser type: %s\n", s)
|
||||
}
|
||||
return nil // never reach here
|
||||
}
|
@ -1,286 +0,0 @@
|
||||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
)
|
||||
|
||||
type tester struct {
|
||||
cluster *cluster
|
||||
limit int
|
||||
exitOnFailure bool
|
||||
|
||||
failures []failure
|
||||
status Status
|
||||
currentRevision int64
|
||||
|
||||
stresserType string
|
||||
scfg stressConfig
|
||||
doChecks bool
|
||||
|
||||
stresser Stresser
|
||||
checker Checker
|
||||
}
|
||||
|
||||
// compactQPS is rough number of compact requests per second.
|
||||
// Previous tests showed etcd can compact about 60,000 entries per second.
|
||||
const compactQPS = 50000
|
||||
|
||||
func (tt *tester) runLoop() {
|
||||
tt.status.Since = time.Now()
|
||||
tt.status.RoundLimit = tt.limit
|
||||
tt.status.cluster = tt.cluster
|
||||
for _, f := range tt.failures {
|
||||
tt.status.Failures = append(tt.status.Failures, f.Desc())
|
||||
}
|
||||
|
||||
if err := tt.resetStressCheck(); err != nil {
|
||||
plog.Errorf("%s failed to start stresser (%v)", tt.logPrefix(), err)
|
||||
tt.failed()
|
||||
return
|
||||
}
|
||||
|
||||
var preModifiedKey int64
|
||||
for round := 0; round < tt.limit || tt.limit == -1; round++ {
|
||||
tt.status.setRound(round)
|
||||
roundTotalCounter.Inc()
|
||||
|
||||
if err := tt.doRound(round); err != nil {
|
||||
plog.Warningf("%s functional-tester returning with error (%v)", tt.logPrefix(), err)
|
||||
if tt.cleanup() != nil {
|
||||
return
|
||||
}
|
||||
// reset preModifiedKey after clean up
|
||||
preModifiedKey = 0
|
||||
continue
|
||||
}
|
||||
// -1 so that logPrefix doesn't print out 'case'
|
||||
tt.status.setCase(-1)
|
||||
|
||||
revToCompact := max(0, tt.currentRevision-10000)
|
||||
currentModifiedKey := tt.stresser.ModifiedKeys()
|
||||
modifiedKey := currentModifiedKey - preModifiedKey
|
||||
preModifiedKey = currentModifiedKey
|
||||
timeout := 10 * time.Second
|
||||
timeout += time.Duration(modifiedKey/compactQPS) * time.Second
|
||||
plog.Infof("%s compacting %d modifications (timeout %v)", tt.logPrefix(), modifiedKey, timeout)
|
||||
if err := tt.compact(revToCompact, timeout); err != nil {
|
||||
plog.Warningf("%s functional-tester compact got error (%v)", tt.logPrefix(), err)
|
||||
if tt.cleanup() != nil {
|
||||
return
|
||||
}
|
||||
// reset preModifiedKey after clean up
|
||||
preModifiedKey = 0
|
||||
}
|
||||
if round > 0 && round%500 == 0 { // every 500 rounds
|
||||
if err := tt.defrag(); err != nil {
|
||||
plog.Warningf("%s functional-tester returning with error (%v)", tt.logPrefix(), err)
|
||||
tt.failed()
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
plog.Infof("%s functional-tester is finished", tt.logPrefix())
|
||||
}
|
||||
|
||||
func (tt *tester) doRound(round int) error {
|
||||
for j, f := range tt.failures {
|
||||
caseTotalCounter.WithLabelValues(f.Desc()).Inc()
|
||||
tt.status.setCase(j)
|
||||
|
||||
if err := tt.cluster.WaitHealth(); err != nil {
|
||||
return fmt.Errorf("wait full health error: %v", err)
|
||||
}
|
||||
plog.Infof("%s injecting failure %q", tt.logPrefix(), f.Desc())
|
||||
if err := f.Inject(tt.cluster, round); err != nil {
|
||||
return fmt.Errorf("injection error: %v", err)
|
||||
}
|
||||
plog.Infof("%s injected failure", tt.logPrefix())
|
||||
|
||||
plog.Infof("%s recovering failure %q", tt.logPrefix(), f.Desc())
|
||||
if err := f.Recover(tt.cluster, round); err != nil {
|
||||
return fmt.Errorf("recovery error: %v", err)
|
||||
}
|
||||
plog.Infof("%s recovered failure", tt.logPrefix())
|
||||
tt.pauseStresser()
|
||||
plog.Infof("%s wait until cluster is healthy", tt.logPrefix())
|
||||
if err := tt.cluster.WaitHealth(); err != nil {
|
||||
return fmt.Errorf("wait full health error: %v", err)
|
||||
}
|
||||
plog.Infof("%s cluster is healthy", tt.logPrefix())
|
||||
|
||||
plog.Infof("%s checking consistency and invariant of cluster", tt.logPrefix())
|
||||
if err := tt.checkConsistency(); err != nil {
|
||||
return fmt.Errorf("tt.checkConsistency error (%v)", err)
|
||||
}
|
||||
plog.Infof("%s checking consistency and invariant of cluster done", tt.logPrefix())
|
||||
|
||||
plog.Infof("%s succeed!", tt.logPrefix())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (tt *tester) updateRevision() error {
|
||||
revs, _, err := tt.cluster.getRevisionHash()
|
||||
for _, rev := range revs {
|
||||
tt.currentRevision = rev
|
||||
break // just need get one of the current revisions
|
||||
}
|
||||
|
||||
plog.Infof("%s updated current revision to %d", tt.logPrefix(), tt.currentRevision)
|
||||
return err
|
||||
}
|
||||
|
||||
func (tt *tester) checkConsistency() (err error) {
|
||||
defer func() {
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if err = tt.updateRevision(); err != nil {
|
||||
plog.Warningf("%s functional-tester returning with tt.updateRevision error (%v)", tt.logPrefix(), err)
|
||||
return
|
||||
}
|
||||
err = tt.startStresser()
|
||||
}()
|
||||
if err = tt.checker.Check(); err != nil {
|
||||
plog.Infof("%s %v", tt.logPrefix(), err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (tt *tester) compact(rev int64, timeout time.Duration) (err error) {
|
||||
tt.pauseStresser()
|
||||
defer func() {
|
||||
if err == nil {
|
||||
err = tt.startStresser()
|
||||
}
|
||||
}()
|
||||
|
||||
plog.Infof("%s compacting storage (current revision %d, compact revision %d)", tt.logPrefix(), tt.currentRevision, rev)
|
||||
if err = tt.cluster.compactKV(rev, timeout); err != nil {
|
||||
return err
|
||||
}
|
||||
plog.Infof("%s compacted storage (compact revision %d)", tt.logPrefix(), rev)
|
||||
|
||||
plog.Infof("%s checking compaction (compact revision %d)", tt.logPrefix(), rev)
|
||||
if err = tt.cluster.checkCompact(rev); err != nil {
|
||||
plog.Warningf("%s checkCompact error (%v)", tt.logPrefix(), err)
|
||||
return err
|
||||
}
|
||||
|
||||
plog.Infof("%s confirmed compaction (compact revision %d)", tt.logPrefix(), rev)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (tt *tester) defrag() error {
|
||||
plog.Infof("%s defragmenting...", tt.logPrefix())
|
||||
if err := tt.cluster.defrag(); err != nil {
|
||||
plog.Warningf("%s defrag error (%v)", tt.logPrefix(), err)
|
||||
if cerr := tt.cleanup(); cerr != nil {
|
||||
return fmt.Errorf("%s, %s", err, cerr)
|
||||
}
|
||||
return err
|
||||
}
|
||||
plog.Infof("%s defragmented...", tt.logPrefix())
|
||||
return nil
|
||||
}
|
||||
|
||||
func (tt *tester) logPrefix() string {
|
||||
var (
|
||||
rd = tt.status.getRound()
|
||||
cs = tt.status.getCase()
|
||||
prefix = fmt.Sprintf("[round#%d case#%d]", rd, cs)
|
||||
)
|
||||
if cs == -1 {
|
||||
prefix = fmt.Sprintf("[round#%d]", rd)
|
||||
}
|
||||
return prefix
|
||||
}
|
||||
|
||||
func (tt *tester) failed() {
|
||||
if !tt.exitOnFailure {
|
||||
return
|
||||
}
|
||||
plog.Warningf("%s exiting on failure", tt.logPrefix())
|
||||
tt.cluster.Terminate()
|
||||
os.Exit(2)
|
||||
}
|
||||
|
||||
func (tt *tester) cleanup() error {
|
||||
defer tt.failed()
|
||||
|
||||
roundFailedTotalCounter.Inc()
|
||||
desc := "compact/defrag"
|
||||
if tt.status.Case != -1 {
|
||||
desc = tt.failures[tt.status.Case].Desc()
|
||||
}
|
||||
caseFailedTotalCounter.WithLabelValues(desc).Inc()
|
||||
|
||||
tt.closeStresser()
|
||||
if err := tt.cluster.Cleanup(); err != nil {
|
||||
plog.Warningf("%s cleanup error: %v", tt.logPrefix(), err)
|
||||
return err
|
||||
}
|
||||
if err := tt.cluster.Reset(); err != nil {
|
||||
plog.Warningf("%s cleanup Bootstrap error: %v", tt.logPrefix(), err)
|
||||
return err
|
||||
}
|
||||
return tt.resetStressCheck()
|
||||
}
|
||||
|
||||
func (tt *tester) pauseStresser() {
|
||||
plog.Infof("%s pausing the stressers...", tt.logPrefix())
|
||||
tt.stresser.Pause()
|
||||
plog.Infof("%s paused stressers", tt.logPrefix())
|
||||
}
|
||||
|
||||
func (tt *tester) startStresser() (err error) {
|
||||
plog.Infof("%s starting the stressers...", tt.logPrefix())
|
||||
err = tt.stresser.Stress()
|
||||
plog.Infof("%s started stressers", tt.logPrefix())
|
||||
return err
|
||||
}
|
||||
|
||||
func (tt *tester) closeStresser() {
|
||||
plog.Infof("%s closing the stressers...", tt.logPrefix())
|
||||
tt.stresser.Close()
|
||||
plog.Infof("%s closed stressers", tt.logPrefix())
|
||||
}
|
||||
|
||||
func (tt *tester) resetStressCheck() error {
|
||||
plog.Infof("%s resetting stressers and checkers...", tt.logPrefix())
|
||||
cs := &compositeStresser{}
|
||||
for _, m := range tt.cluster.Members {
|
||||
s := NewStresser(tt.stresserType, &tt.scfg, m)
|
||||
cs.stressers = append(cs.stressers, s)
|
||||
}
|
||||
tt.stresser = cs
|
||||
if !tt.doChecks {
|
||||
tt.checker = newNoChecker()
|
||||
return tt.startStresser()
|
||||
}
|
||||
chk := newHashChecker(hashAndRevGetter(tt.cluster))
|
||||
if schk := cs.Checker(); schk != nil {
|
||||
chk = newCompositeChecker([]Checker{chk, schk})
|
||||
}
|
||||
tt.checker = chk
|
||||
return tt.startStresser()
|
||||
}
|
||||
|
||||
func (tt *tester) Report() int64 { return tt.stresser.ModifiedKeys() }
|
@ -1,51 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func getSameValue(vals map[string]int64) bool {
|
||||
var rv int64
|
||||
for _, v := range vals {
|
||||
if rv == 0 {
|
||||
rv = v
|
||||
}
|
||||
if rv != v {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func max(n1, n2 int64) int64 {
|
||||
if n1 > n2 {
|
||||
return n1
|
||||
}
|
||||
return n2
|
||||
}
|
||||
|
||||
func errsToError(errs []error) error {
|
||||
if len(errs) == 0 {
|
||||
return nil
|
||||
}
|
||||
stringArr := make([]string, len(errs))
|
||||
for i, err := range errs {
|
||||
stringArr[i] = err.Error()
|
||||
}
|
||||
return fmt.Errorf(strings.Join(stringArr, ", "))
|
||||
}
|
@ -1,117 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"net"
|
||||
"net/http"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
clientV2 "github.com/coreos/etcd/client"
|
||||
)
|
||||
|
||||
type v2Stresser struct {
|
||||
Endpoint string
|
||||
|
||||
keySize int
|
||||
keySuffixRange int
|
||||
|
||||
N int
|
||||
|
||||
rateLimiter *rate.Limiter
|
||||
|
||||
wg sync.WaitGroup
|
||||
|
||||
atomicModifiedKey int64
|
||||
|
||||
cancel func()
|
||||
}
|
||||
|
||||
func (s *v2Stresser) Stress() error {
|
||||
cfg := clientV2.Config{
|
||||
Endpoints: []string{s.Endpoint},
|
||||
Transport: &http.Transport{
|
||||
Dial: (&net.Dialer{
|
||||
Timeout: time.Second,
|
||||
KeepAlive: 30 * time.Second,
|
||||
}).Dial,
|
||||
MaxIdleConnsPerHost: s.N,
|
||||
},
|
||||
}
|
||||
c, err := clientV2.New(cfg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
kv := clientV2.NewKeysAPI(c)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
s.cancel = cancel
|
||||
s.wg.Add(s.N)
|
||||
for i := 0; i < s.N; i++ {
|
||||
go func() {
|
||||
defer s.wg.Done()
|
||||
s.run(ctx, kv)
|
||||
}()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *v2Stresser) run(ctx context.Context, kv clientV2.KeysAPI) {
|
||||
for {
|
||||
if err := s.rateLimiter.Wait(ctx); err == context.Canceled {
|
||||
return
|
||||
}
|
||||
setctx, setcancel := context.WithTimeout(ctx, clientV2.DefaultRequestTimeout)
|
||||
key := fmt.Sprintf("foo%016x", rand.Intn(s.keySuffixRange))
|
||||
_, err := kv.Set(setctx, key, string(randBytes(s.keySize)), nil)
|
||||
if err == nil {
|
||||
atomic.AddInt64(&s.atomicModifiedKey, 1)
|
||||
}
|
||||
setcancel()
|
||||
if err == context.Canceled {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *v2Stresser) Pause() {
|
||||
s.cancel()
|
||||
s.wg.Wait()
|
||||
}
|
||||
|
||||
func (s *v2Stresser) Close() {
|
||||
s.Pause()
|
||||
}
|
||||
|
||||
func (s *v2Stresser) ModifiedKeys() int64 {
|
||||
return atomic.LoadInt64(&s.atomicModifiedKey)
|
||||
}
|
||||
|
||||
func (s *v2Stresser) Checker() Checker { return nil }
|
||||
|
||||
func randBytes(size int) []byte {
|
||||
data := make([]byte, size)
|
||||
for i := 0; i < size; i++ {
|
||||
data[i] = byte(int('a') + rand.Intn(26))
|
||||
}
|
||||
return data
|
||||
}
|
@ -1,37 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
<<COMMENT
|
||||
# to run agent
|
||||
./scripts/agent-1.sh
|
||||
|
||||
# to run with failpoints
|
||||
ETCD_EXEC_PATH=/etcd-failpoints ./scripts/agent-1.sh
|
||||
COMMENT
|
||||
|
||||
if ! [[ "$0" =~ "scripts/agent-1.sh" ]]; then
|
||||
echo "must be run from tools/functional-tester"
|
||||
exit 255
|
||||
fi
|
||||
|
||||
if [ -z "${ETCD_EXEC_PATH}" ]; then
|
||||
ETCD_EXEC_PATH=/etcd
|
||||
echo "Running agent without failpoints:" ${ETCD_EXEC_PATH}
|
||||
elif [[ "${ETCD_EXEC_PATH}" == "/etcd-failpoints" ]]; then
|
||||
echo "Running agent with failpoints:" ${ETCD_EXEC_PATH}
|
||||
else
|
||||
echo "Cannot find executable:" ${ETCD_EXEC_PATH}
|
||||
exit 255
|
||||
fi
|
||||
|
||||
rm -rf `pwd`/agent-1 && mkdir -p `pwd`/agent-1
|
||||
docker run \
|
||||
--rm \
|
||||
--net=host \
|
||||
--name agent-1 \
|
||||
--mount type=bind,source=`pwd`/agent-1,destination=/agent-1 \
|
||||
gcr.io/etcd-development/etcd-functional-tester:go1.9.3 \
|
||||
/bin/bash -c "/etcd-agent \
|
||||
--etcd-path ${ETCD_EXEC_PATH} \
|
||||
--etcd-log-dir /agent-1 \
|
||||
--port :19027 \
|
||||
--failpoint-addr :7381"
|
@ -1,37 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
<<COMMENT
|
||||
# to run agent
|
||||
./scripts/agent-2.sh
|
||||
|
||||
# to run with failpoints
|
||||
ETCD_EXEC_PATH=/etcd-failpoints ./scripts/agent-2.sh
|
||||
COMMENT
|
||||
|
||||
if ! [[ "$0" =~ "scripts/agent-2.sh" ]]; then
|
||||
echo "must be run from tools/functional-tester"
|
||||
exit 255
|
||||
fi
|
||||
|
||||
if [ -z "${ETCD_EXEC_PATH}" ]; then
|
||||
ETCD_EXEC_PATH=/etcd
|
||||
echo "Running agent without failpoints:" ${ETCD_EXEC_PATH}
|
||||
elif [[ "${ETCD_EXEC_PATH}" == "/etcd-failpoints" ]]; then
|
||||
echo "Running agent with failpoints:" ${ETCD_EXEC_PATH}
|
||||
else
|
||||
echo "Cannot find executable:" ${ETCD_EXEC_PATH}
|
||||
exit 255
|
||||
fi
|
||||
|
||||
rm -rf `pwd`/agent-2 && mkdir -p `pwd`/agent-2
|
||||
docker run \
|
||||
--rm \
|
||||
--net=host \
|
||||
--name agent-2 \
|
||||
--mount type=bind,source=`pwd`/agent-2,destination=/agent-2 \
|
||||
gcr.io/etcd-development/etcd-functional-tester:go1.9.3 \
|
||||
/bin/bash -c "/etcd-agent \
|
||||
--etcd-path ${ETCD_EXEC_PATH} \
|
||||
--etcd-log-dir /agent-2 \
|
||||
--port :29027 \
|
||||
--failpoint-addr :7382"
|
@ -1,37 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
<<COMMENT
|
||||
# to run agent
|
||||
./scripts/agent-3.sh
|
||||
|
||||
# to run with failpoints
|
||||
ETCD_EXEC_PATH=/etcd-failpoints ./scripts/agent-3.sh
|
||||
COMMENT
|
||||
|
||||
if ! [[ "$0" =~ "scripts/agent-3.sh" ]]; then
|
||||
echo "must be run from tools/functional-tester"
|
||||
exit 255
|
||||
fi
|
||||
|
||||
if [ -z "${ETCD_EXEC_PATH}" ]; then
|
||||
ETCD_EXEC_PATH=/etcd
|
||||
echo "Running agent without failpoints:" ${ETCD_EXEC_PATH}
|
||||
elif [[ "${ETCD_EXEC_PATH}" == "/etcd-failpoints" ]]; then
|
||||
echo "Running agent with failpoints:" ${ETCD_EXEC_PATH}
|
||||
else
|
||||
echo "Cannot find executable:" ${ETCD_EXEC_PATH}
|
||||
exit 255
|
||||
fi
|
||||
|
||||
rm -rf `pwd`/agent-3 && mkdir -p `pwd`/agent-3
|
||||
docker run \
|
||||
--rm \
|
||||
--net=host \
|
||||
--name agent-3 \
|
||||
--mount type=bind,source=`pwd`/agent-3,destination=/agent-3 \
|
||||
gcr.io/etcd-development/etcd-functional-tester:go1.9.3 \
|
||||
/bin/bash -c "/etcd-agent \
|
||||
--etcd-path ${ETCD_EXEC_PATH} \
|
||||
--etcd-log-dir /agent-3 \
|
||||
--port :39027 \
|
||||
--failpoint-addr :7383"
|
@ -1,24 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
if ! [[ "$0" =~ "scripts/tester-limit.sh" ]]; then
|
||||
echo "must be run from tools/functional-tester"
|
||||
exit 255
|
||||
fi
|
||||
|
||||
# to run only 1 test round
|
||||
docker run \
|
||||
--rm \
|
||||
--net=host \
|
||||
--name tester \
|
||||
gcr.io/etcd-development/etcd-functional-tester:go1.9.3 \
|
||||
/bin/bash -c "/etcd-tester \
|
||||
--agent-endpoints '127.0.0.1:19027,127.0.0.1:29027,127.0.0.1:39027' \
|
||||
--client-ports 1379,2379,3379 \
|
||||
--advertise-client-ports 13790,23790,33790 \
|
||||
--peer-ports 1380,2380,3380 \
|
||||
--advertise-peer-ports 13800,23800,33800 \
|
||||
--limit 1 \
|
||||
--stress-qps=2500 \
|
||||
--stress-key-txn-count 100 \
|
||||
--stress-key-txn-ops 10 \
|
||||
--exit-on-failure"
|
@ -1,25 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
if ! [[ "$0" =~ "scripts/tester-runner.sh" ]]; then
|
||||
echo "must be run from tools/functional-tester"
|
||||
exit 255
|
||||
fi
|
||||
|
||||
# to run with etcd-runner
|
||||
docker run \
|
||||
--rm \
|
||||
--net=host \
|
||||
--name tester \
|
||||
gcr.io/etcd-development/etcd-functional-tester:go1.9.3 \
|
||||
/bin/bash -c "/etcd-tester \
|
||||
--agent-endpoints '127.0.0.1:19027,127.0.0.1:29027,127.0.0.1:39027' \
|
||||
--client-ports 1379,2379,3379 \
|
||||
--advertise-client-ports 13790,23790,33790 \
|
||||
--peer-ports 1380,2380,3380 \
|
||||
--advertise-peer-ports 13800,23800,33800 \
|
||||
--stress-qps=2500 \
|
||||
--stress-key-txn-count 100 \
|
||||
--stress-key-txn-ops 10 \
|
||||
--etcd-runner /etcd-runner \
|
||||
--stresser=keys,lease,election-runner,watch-runner,lock-racer-runner,lease-runner \
|
||||
--exit-on-failure"
|
@ -1,22 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
if ! [[ "$0" =~ "scripts/tester.sh" ]]; then
|
||||
echo "must be run from tools/functional-tester"
|
||||
exit 255
|
||||
fi
|
||||
|
||||
docker run \
|
||||
--rm \
|
||||
--net=host \
|
||||
--name tester \
|
||||
gcr.io/etcd-development/etcd-functional-tester:go1.9.3 \
|
||||
/bin/bash -c "/etcd-tester \
|
||||
--agent-endpoints '127.0.0.1:19027,127.0.0.1:29027,127.0.0.1:39027' \
|
||||
--client-ports 1379,2379,3379 \
|
||||
--advertise-client-ports 13790,23790,33790 \
|
||||
--peer-ports 1380,2380,3380 \
|
||||
--advertise-peer-ports 13800,23800,33800 \
|
||||
--stress-qps=2500 \
|
||||
--stress-key-txn-count 100 \
|
||||
--stress-key-txn-ops 10 \
|
||||
--exit-on-failure"
|
@ -1,21 +0,0 @@
|
||||
# Use goreman to run `go get github.com/mattn/goreman`
|
||||
|
||||
# peer bridges
|
||||
pbridge1: tools/local-tester/bridge.sh 127.0.0.1:11111 127.0.0.1:12380
|
||||
pbridge2: tools/local-tester/bridge.sh 127.0.0.1:22222 127.0.0.1:22380
|
||||
pbridge3: tools/local-tester/bridge.sh 127.0.0.1:33333 127.0.0.1:32380
|
||||
|
||||
# client bridges
|
||||
cbridge1: tools/local-tester/bridge.sh 127.0.0.1:2379 127.0.0.1:11119
|
||||
cbridge2: tools/local-tester/bridge.sh 127.0.0.1:22379 127.0.0.1:22229
|
||||
cbridge3: tools/local-tester/bridge.sh 127.0.0.1:32379 127.0.0.1:33339
|
||||
|
||||
faults: tools/local-tester/faults.sh
|
||||
|
||||
stress-put: tools/benchmark/benchmark --endpoints=127.0.0.1:2379,127.0.0.1:22379,127.0.0.1:32379 --clients=27 --conns=3 put --sequential-keys --key-space-size=100000 --total=100000
|
||||
|
||||
etcd1: GOFAIL_HTTP="127.0.0.1:11180" bin/etcd --name infra1 --snapshot-count=1000 --listen-client-urls http://127.0.0.1:11119 --advertise-client-urls http://127.0.0.1:2379 --listen-peer-urls http://127.0.0.1:12380 --initial-advertise-peer-urls http://127.0.0.1:11111 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:11111,infra2=http://127.0.0.1:22222,infra3=http://127.0.0.1:33333' --initial-cluster-state new --enable-pprof
|
||||
etcd2: GOFAIL_HTTP="127.0.0.1:22280" bin/etcd --name infra2 --snapshot-count=1000 --listen-client-urls http://127.0.0.1:22229 --advertise-client-urls http://127.0.0.1:22379 --listen-peer-urls http://127.0.0.1:22380 --initial-advertise-peer-urls http://127.0.0.1:22222 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:11111,infra2=http://127.0.0.1:22222,infra3=http://127.0.0.1:33333' --initial-cluster-state new --enable-pprof
|
||||
etcd3: GOFAIL_HTTP="127.0.0.1:33380" bin/etcd --name infra3 --snapshot-count=1000 --listen-client-urls http://127.0.0.1:33339 --advertise-client-urls http://127.0.0.1:32379 --listen-peer-urls http://127.0.0.1:32380 --initial-advertise-peer-urls http://127.0.0.1:33333 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:11111,infra2=http://127.0.0.1:22222,infra3=http://127.0.0.1:33333' --initial-cluster-state new --enable-pprof
|
||||
# in future, use proxy to listen on 2379
|
||||
#proxy: bin/etcd --name infra-proxy1 --proxy=on --listen-client-urls http://127.0.0.1:2378 --initial-cluster 'infra1=http://127.0.0.1:12380,infra2=http://127.0.0.1:22380,infra3=http://127.0.0.1:32380' --enable-pprof
|
@ -1,25 +0,0 @@
|
||||
# etcd local-tester
|
||||
|
||||
The etcd local-tester runs a fault injected cluster using local processes. It sets up an etcd cluster with unreliable network bridges on its peer and client interfaces. The cluster runs with a constant stream of `Put` requests to simulate client usage. A fault injection script periodically kills cluster members and disrupts bridge connectivity.
|
||||
|
||||
# Requirements
|
||||
|
||||
local-tester depends on `goreman` to manage its processes and `bash` to run fault injection.
|
||||
|
||||
# Building
|
||||
|
||||
local-tester needs `etcd`, `benchmark`, and `bridge` binaries. To build these binaries, run the following from the etcd repository root:
|
||||
|
||||
```sh
|
||||
./build
|
||||
pushd tools/benchmark/ && go build && popd
|
||||
pushd tools/local-tester/bridge && go build && popd
|
||||
```
|
||||
|
||||
# Running
|
||||
|
||||
The fault injected cluster is invoked with `goreman`:
|
||||
|
||||
```sh
|
||||
goreman -f tools/local-tester/Procfile start
|
||||
```
|
@ -1,16 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
exec tools/local-tester/bridge/bridge \
|
||||
-delay-accept \
|
||||
-reset-listen \
|
||||
-conn-fault-rate=0.25 \
|
||||
-immediate-close \
|
||||
-blackhole \
|
||||
-time-close \
|
||||
-write-remote-only \
|
||||
-read-remote-only \
|
||||
-random-blackhole \
|
||||
-corrupt-receive \
|
||||
-corrupt-send \
|
||||
-reorder \
|
||||
$@
|
@ -1,320 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package main is the entry point for the local tester network bridge.
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"math/rand"
|
||||
"net"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type bridgeConn struct {
|
||||
in net.Conn
|
||||
out net.Conn
|
||||
d dispatcher
|
||||
}
|
||||
|
||||
func newBridgeConn(in net.Conn, d dispatcher) (*bridgeConn, error) {
|
||||
out, err := net.Dial("tcp", flag.Args()[1])
|
||||
if err != nil {
|
||||
in.Close()
|
||||
return nil, err
|
||||
}
|
||||
return &bridgeConn{in, out, d}, nil
|
||||
}
|
||||
|
||||
func (b *bridgeConn) String() string {
|
||||
return fmt.Sprintf("%v <-> %v", b.in.RemoteAddr(), b.out.RemoteAddr())
|
||||
}
|
||||
|
||||
func (b *bridgeConn) Close() {
|
||||
b.in.Close()
|
||||
b.out.Close()
|
||||
}
|
||||
|
||||
func bridge(b *bridgeConn) {
|
||||
log.Println("bridging", b.String())
|
||||
go b.d.Copy(b.out, makeFetch(b.in))
|
||||
b.d.Copy(b.in, makeFetch(b.out))
|
||||
}
|
||||
|
||||
func delayBridge(b *bridgeConn, txDelay, rxDelay time.Duration) {
|
||||
go b.d.Copy(b.out, makeFetchDelay(makeFetch(b.in), txDelay))
|
||||
b.d.Copy(b.in, makeFetchDelay(makeFetch(b.out), rxDelay))
|
||||
}
|
||||
|
||||
func timeBridge(b *bridgeConn) {
|
||||
go func() {
|
||||
t := time.Duration(rand.Intn(5)+1) * time.Second
|
||||
time.Sleep(t)
|
||||
log.Printf("killing connection %s after %v\n", b.String(), t)
|
||||
b.Close()
|
||||
}()
|
||||
bridge(b)
|
||||
}
|
||||
|
||||
func blackhole(b *bridgeConn) {
|
||||
log.Println("blackholing connection", b.String())
|
||||
io.Copy(ioutil.Discard, b.in)
|
||||
b.Close()
|
||||
}
|
||||
|
||||
func readRemoteOnly(b *bridgeConn) {
|
||||
log.Println("one way (<-)", b.String())
|
||||
b.d.Copy(b.in, makeFetch(b.out))
|
||||
}
|
||||
|
||||
func writeRemoteOnly(b *bridgeConn) {
|
||||
log.Println("one way (->)", b.String())
|
||||
b.d.Copy(b.out, makeFetch(b.in))
|
||||
}
|
||||
|
||||
func corruptReceive(b *bridgeConn) {
|
||||
log.Println("corruptReceive", b.String())
|
||||
go b.d.Copy(b.in, makeFetchCorrupt(makeFetch(b.out)))
|
||||
b.d.Copy(b.out, makeFetch(b.in))
|
||||
}
|
||||
|
||||
func corruptSend(b *bridgeConn) {
|
||||
log.Println("corruptSend", b.String())
|
||||
go b.d.Copy(b.out, makeFetchCorrupt(makeFetch(b.in)))
|
||||
b.d.Copy(b.in, makeFetch(b.out))
|
||||
}
|
||||
|
||||
func makeFetch(c io.Reader) fetchFunc {
|
||||
return func() ([]byte, error) {
|
||||
b := make([]byte, 4096)
|
||||
n, err := c.Read(b)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return b[:n], nil
|
||||
}
|
||||
}
|
||||
|
||||
func makeFetchCorrupt(f func() ([]byte, error)) fetchFunc {
|
||||
return func() ([]byte, error) {
|
||||
b, err := f()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// corrupt one byte approximately every 16K
|
||||
for i := 0; i < len(b); i++ {
|
||||
if rand.Intn(16*1024) == 0 {
|
||||
b[i] = b[i] + 1
|
||||
}
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
}
|
||||
|
||||
func makeFetchRand(f func() ([]byte, error)) fetchFunc {
|
||||
return func() ([]byte, error) {
|
||||
if rand.Intn(10) == 0 {
|
||||
return nil, fmt.Errorf("fetchRand: done")
|
||||
}
|
||||
b, err := f()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
}
|
||||
|
||||
func makeFetchDelay(f fetchFunc, delay time.Duration) fetchFunc {
|
||||
return func() ([]byte, error) {
|
||||
b, err := f()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
time.Sleep(delay)
|
||||
return b, nil
|
||||
}
|
||||
}
|
||||
|
||||
func randomBlackhole(b *bridgeConn) {
|
||||
log.Println("random blackhole: connection", b.String())
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(2)
|
||||
go func() {
|
||||
b.d.Copy(b.in, makeFetchRand(makeFetch(b.out)))
|
||||
wg.Done()
|
||||
}()
|
||||
go func() {
|
||||
b.d.Copy(b.out, makeFetchRand(makeFetch(b.in)))
|
||||
wg.Done()
|
||||
}()
|
||||
wg.Wait()
|
||||
b.Close()
|
||||
}
|
||||
|
||||
type config struct {
|
||||
delayAccept bool
|
||||
resetListen bool
|
||||
|
||||
connFaultRate float64
|
||||
immediateClose bool
|
||||
blackhole bool
|
||||
timeClose bool
|
||||
writeRemoteOnly bool
|
||||
readRemoteOnly bool
|
||||
randomBlackhole bool
|
||||
corruptSend bool
|
||||
corruptReceive bool
|
||||
reorder bool
|
||||
|
||||
txDelay string
|
||||
rxDelay string
|
||||
}
|
||||
|
||||
type acceptFaultFunc func()
|
||||
type connFaultFunc func(*bridgeConn)
|
||||
|
||||
func main() {
|
||||
var cfg config
|
||||
|
||||
flag.BoolVar(&cfg.delayAccept, "delay-accept", false, "delays accepting new connections")
|
||||
flag.BoolVar(&cfg.resetListen, "reset-listen", false, "resets the listening port")
|
||||
|
||||
flag.Float64Var(&cfg.connFaultRate, "conn-fault-rate", 0.0, "rate of faulty connections")
|
||||
flag.BoolVar(&cfg.immediateClose, "immediate-close", false, "close after accept")
|
||||
flag.BoolVar(&cfg.blackhole, "blackhole", false, "reads nothing, writes go nowhere")
|
||||
flag.BoolVar(&cfg.timeClose, "time-close", false, "close after random time")
|
||||
flag.BoolVar(&cfg.writeRemoteOnly, "write-remote-only", false, "only write, no read")
|
||||
flag.BoolVar(&cfg.readRemoteOnly, "read-remote-only", false, "only read, no write")
|
||||
flag.BoolVar(&cfg.randomBlackhole, "random-blackhole", false, "blackhole after data xfer")
|
||||
flag.BoolVar(&cfg.corruptReceive, "corrupt-receive", false, "corrupt packets received from destination")
|
||||
flag.BoolVar(&cfg.corruptSend, "corrupt-send", false, "corrupt packets sent to destination")
|
||||
flag.BoolVar(&cfg.reorder, "reorder", false, "reorder packet delivery")
|
||||
|
||||
flag.StringVar(&cfg.txDelay, "tx-delay", "0", "duration to delay client transmission to server")
|
||||
flag.StringVar(&cfg.rxDelay, "rx-delay", "0", "duration to delay client receive from server")
|
||||
|
||||
flag.Parse()
|
||||
|
||||
lAddr := flag.Args()[0]
|
||||
fwdAddr := flag.Args()[1]
|
||||
log.Println("listening on ", lAddr)
|
||||
log.Println("forwarding to ", fwdAddr)
|
||||
l, err := net.Listen("tcp", lAddr)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
defer l.Close()
|
||||
|
||||
acceptFaults := []acceptFaultFunc{func() {}}
|
||||
if cfg.delayAccept {
|
||||
f := func() {
|
||||
log.Println("delaying accept")
|
||||
time.Sleep(3 * time.Second)
|
||||
}
|
||||
acceptFaults = append(acceptFaults, f)
|
||||
}
|
||||
if cfg.resetListen {
|
||||
f := func() {
|
||||
log.Println("reset listen port")
|
||||
l.Close()
|
||||
newListener, err := net.Listen("tcp", lAddr)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
l = newListener
|
||||
|
||||
}
|
||||
acceptFaults = append(acceptFaults, f)
|
||||
}
|
||||
|
||||
connFaults := []connFaultFunc{func(b *bridgeConn) { bridge(b) }}
|
||||
if cfg.immediateClose {
|
||||
f := func(b *bridgeConn) {
|
||||
log.Printf("terminating connection %s immediately", b.String())
|
||||
b.Close()
|
||||
}
|
||||
connFaults = append(connFaults, f)
|
||||
}
|
||||
if cfg.blackhole {
|
||||
connFaults = append(connFaults, blackhole)
|
||||
}
|
||||
if cfg.timeClose {
|
||||
connFaults = append(connFaults, timeBridge)
|
||||
}
|
||||
if cfg.writeRemoteOnly {
|
||||
connFaults = append(connFaults, writeRemoteOnly)
|
||||
}
|
||||
if cfg.readRemoteOnly {
|
||||
connFaults = append(connFaults, readRemoteOnly)
|
||||
}
|
||||
if cfg.randomBlackhole {
|
||||
connFaults = append(connFaults, randomBlackhole)
|
||||
}
|
||||
if cfg.corruptSend {
|
||||
connFaults = append(connFaults, corruptSend)
|
||||
}
|
||||
if cfg.corruptReceive {
|
||||
connFaults = append(connFaults, corruptReceive)
|
||||
}
|
||||
|
||||
txd, txdErr := time.ParseDuration(cfg.txDelay)
|
||||
if txdErr != nil {
|
||||
log.Fatal(txdErr)
|
||||
}
|
||||
rxd, rxdErr := time.ParseDuration(cfg.rxDelay)
|
||||
if rxdErr != nil {
|
||||
log.Fatal(rxdErr)
|
||||
}
|
||||
if txd != 0 || rxd != 0 {
|
||||
f := func(b *bridgeConn) { delayBridge(b, txd, rxd) }
|
||||
connFaults = append(connFaults, f)
|
||||
}
|
||||
|
||||
if len(connFaults) > 1 && cfg.connFaultRate == 0 {
|
||||
log.Fatal("connection faults defined but conn-fault-rate=0")
|
||||
}
|
||||
|
||||
var disp dispatcher
|
||||
if cfg.reorder {
|
||||
disp = newDispatcherPool()
|
||||
} else {
|
||||
disp = newDispatcherImmediate()
|
||||
}
|
||||
|
||||
for {
|
||||
acceptFaults[rand.Intn(len(acceptFaults))]()
|
||||
conn, err := l.Accept()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
r := rand.Intn(len(connFaults))
|
||||
if rand.Intn(100) >= int(100.0*cfg.connFaultRate) {
|
||||
r = 0
|
||||
}
|
||||
|
||||
bc, err := newBridgeConn(conn, disp)
|
||||
if err != nil {
|
||||
log.Printf("oops %v", err)
|
||||
continue
|
||||
}
|
||||
go connFaults[r](bc)
|
||||
}
|
||||
}
|
@ -1,140 +0,0 @@
|
||||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"io"
|
||||
"math/rand"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
// dispatchPoolDelay is the time to wait before flushing all buffered packets
|
||||
dispatchPoolDelay = 100 * time.Millisecond
|
||||
// dispatchPacketBytes is how many bytes to send until choosing a new connection
|
||||
dispatchPacketBytes = 32
|
||||
)
|
||||
|
||||
type dispatcher interface {
|
||||
// Copy works like io.Copy using buffers provided by fetchFunc
|
||||
Copy(io.Writer, fetchFunc) error
|
||||
}
|
||||
|
||||
type fetchFunc func() ([]byte, error)
|
||||
|
||||
type dispatcherPool struct {
|
||||
// mu protects the dispatch packet queue 'q'
|
||||
mu sync.Mutex
|
||||
q []dispatchPacket
|
||||
}
|
||||
|
||||
type dispatchPacket struct {
|
||||
buf []byte
|
||||
out io.Writer
|
||||
}
|
||||
|
||||
func newDispatcherPool() dispatcher {
|
||||
d := &dispatcherPool{}
|
||||
go d.writeLoop()
|
||||
return d
|
||||
}
|
||||
|
||||
func (d *dispatcherPool) writeLoop() {
|
||||
for {
|
||||
time.Sleep(dispatchPoolDelay)
|
||||
d.flush()
|
||||
}
|
||||
}
|
||||
|
||||
func (d *dispatcherPool) flush() {
|
||||
d.mu.Lock()
|
||||
pkts := d.q
|
||||
d.q = nil
|
||||
d.mu.Unlock()
|
||||
if len(pkts) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// sort by sockets; preserve the packet ordering within a socket
|
||||
pktmap := make(map[io.Writer][]dispatchPacket)
|
||||
outs := []io.Writer{}
|
||||
for _, pkt := range pkts {
|
||||
opkts, ok := pktmap[pkt.out]
|
||||
if !ok {
|
||||
outs = append(outs, pkt.out)
|
||||
}
|
||||
pktmap[pkt.out] = append(opkts, pkt)
|
||||
}
|
||||
|
||||
// send all packets in pkts
|
||||
for len(outs) != 0 {
|
||||
// randomize writer on every write
|
||||
r := rand.Intn(len(outs))
|
||||
rpkts := pktmap[outs[r]]
|
||||
rpkts[0].out.Write(rpkts[0].buf)
|
||||
// dequeue packet
|
||||
rpkts = rpkts[1:]
|
||||
if len(rpkts) == 0 {
|
||||
delete(pktmap, outs[r])
|
||||
outs = append(outs[:r], outs[r+1:]...)
|
||||
} else {
|
||||
pktmap[outs[r]] = rpkts
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *dispatcherPool) Copy(w io.Writer, f fetchFunc) error {
|
||||
for {
|
||||
b, err := f()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
pkts := []dispatchPacket{}
|
||||
for len(b) > 0 {
|
||||
pkt := b
|
||||
if len(b) > dispatchPacketBytes {
|
||||
pkt = pkt[:dispatchPacketBytes]
|
||||
b = b[dispatchPacketBytes:]
|
||||
} else {
|
||||
b = nil
|
||||
}
|
||||
pkts = append(pkts, dispatchPacket{pkt, w})
|
||||
}
|
||||
|
||||
d.mu.Lock()
|
||||
d.q = append(d.q, pkts...)
|
||||
d.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
type dispatcherImmediate struct{}
|
||||
|
||||
func newDispatcherImmediate() dispatcher {
|
||||
return &dispatcherImmediate{}
|
||||
}
|
||||
|
||||
func (d *dispatcherImmediate) Copy(w io.Writer, f fetchFunc) error {
|
||||
for {
|
||||
b, err := f()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.Write(b); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
@ -1,108 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
PROCFILE="tools/local-tester/Procfile"
|
||||
HTTPFAIL=(127.0.0.1:11180 127.0.0.1:22280 127.0.0.1:33380)
|
||||
|
||||
function wait_time {
|
||||
expr $RANDOM % 10 + 1
|
||||
}
|
||||
|
||||
function cycle {
|
||||
for a; do
|
||||
echo "cycling $a"
|
||||
goreman -f $PROCFILE run stop $a || echo "could not stop $a"
|
||||
sleep `wait_time`s
|
||||
goreman -f $PROCFILE run restart $a || echo "could not restart $a"
|
||||
done
|
||||
}
|
||||
|
||||
function cycle_members {
|
||||
cycle etcd1 etcd2 etcd3
|
||||
}
|
||||
function cycle_pbridge {
|
||||
cycle pbridge1 pbridge2 pbridge3
|
||||
}
|
||||
function cycle_cbridge {
|
||||
cycle cbridge1 cbridge2 cbridge3
|
||||
}
|
||||
function cycle_stresser {
|
||||
cycle stress-put
|
||||
}
|
||||
|
||||
function kill_maj {
|
||||
idx="etcd"`expr $RANDOM % 3 + 1`
|
||||
idx2="$idx"
|
||||
while [ "$idx" == "$idx2" ]; do
|
||||
idx2="etcd"`expr $RANDOM % 3 + 1`
|
||||
done
|
||||
echo "kill majority $idx $idx2"
|
||||
goreman -f $PROCFILE run stop $idx || echo "could not stop $idx"
|
||||
goreman -f $PROCFILE run stop $idx2 || echo "could not stop $idx2"
|
||||
sleep `wait_time`s
|
||||
goreman -f $PROCFILE run restart $idx || echo "could not restart $idx"
|
||||
goreman -f $PROCFILE run restart $idx2 || echo "could not restart $idx2"
|
||||
}
|
||||
|
||||
function kill_all {
|
||||
for a in etcd1 etcd2 etcd3; do
|
||||
goreman -f $PROCFILE run stop $a || echo "could not stop $a"
|
||||
done
|
||||
sleep `wait_time`s
|
||||
for a in etcd1 etcd2 etcd3; do
|
||||
goreman -f $PROCFILE run restart $a || echo "could not restart $a"
|
||||
done
|
||||
}
|
||||
|
||||
function rand_fp {
|
||||
echo "$FAILPOINTS" | sed `expr $RANDOM % $NUMFPS + 1`"q;d"
|
||||
}
|
||||
|
||||
# fp_activate <http> <fppath> <value>
|
||||
function fp_activate {
|
||||
curl "$1"/"$2" -XPUT -d "$3" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
function fp_rand_single {
|
||||
fp=`rand_fp`
|
||||
fp_activate ${HTTPFAIL[`expr $RANDOM % ${#HTTPFAIL[@]}`]} $fp 'panic("'$fp'")'
|
||||
sleep `wait_time`s
|
||||
}
|
||||
|
||||
function fp_rand_all {
|
||||
fp=`rand_fp`
|
||||
for a in `seq ${#HTTPFAIL[@]}`; do fp_activate ${HTTPFAIL[$a]} "$fp" 'panic("'$fp'")'; done
|
||||
sleep `wait_time`s
|
||||
}
|
||||
|
||||
function fp_all_rand_fire {
|
||||
for fp in $FAILPOINTS; do
|
||||
for url in "${HTTPFAIL[@]}"; do
|
||||
fp_activate "$url" "$fp" '0.5%panic("0.5%'$fp'")'
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
function choose {
|
||||
fault=${FAULTS[`expr $RANDOM % ${#FAULTS[@]}`]}
|
||||
echo $fault
|
||||
$fault || echo "failed: $fault"
|
||||
}
|
||||
|
||||
sleep 2s
|
||||
|
||||
FAULTS=(cycle_members kill_maj kill_all cycle_pbridge cycle_cbridge cycle_stresser)
|
||||
|
||||
# add failpoint faults if available
|
||||
FAILPOINTS=`curl http://"${HTTPFAIL[0]}" 2>/dev/null | cut -f1 -d'=' | grep -v "^$"`
|
||||
NUMFPS=`echo $(echo "$FAILPOINTS" | wc -l)`
|
||||
if [ "$NUMFPS" != "0" ]; then
|
||||
FAULTS+=(fp_rand_single)
|
||||
FAULTS+=(fp_rand_all)
|
||||
fi
|
||||
|
||||
while [ 1 ]; do
|
||||
choose
|
||||
# start any nodes that have been killed by failpoints
|
||||
for a in etcd1 etcd2 etcd3; do goreman -f $PROCFILE run start $a; done
|
||||
fp_all_rand_fire
|
||||
done
|
Loading…
x
Reference in New Issue
Block a user