From 60c8dbe0c98194372adfa11e26b44ecf0096390c Mon Sep 17 00:00:00 2001 From: Xiang Li Date: Tue, 15 Jul 2014 14:46:45 -0700 Subject: [PATCH] etcd: rewrite kill_leader and kill_random test --- etcd/etcd_functional_test.go | 105 +++++++++++++++++++++++++++ tests/functional/kill_leader_test.go | 52 ------------- tests/functional/kill_random_test.go | 75 ------------------- 3 files changed, 105 insertions(+), 127 deletions(-) create mode 100644 etcd/etcd_functional_test.go delete mode 100644 tests/functional/kill_random_test.go diff --git a/etcd/etcd_functional_test.go b/etcd/etcd_functional_test.go new file mode 100644 index 000000000..184204030 --- /dev/null +++ b/etcd/etcd_functional_test.go @@ -0,0 +1,105 @@ +package etcd + +import ( + "math/rand" + "testing" + "time" +) + +func TestKillLeader(t *testing.T) { + tests := []int{3, 5, 9, 11} + + for i, tt := range tests { + es, hs := buildCluster(tt, false) + waitCluster(t, es) + waitLeader(es) + + lead := es[0].node.Leader() + es[lead].Stop() + + time.Sleep(es[0].tickDuration * defaultElection * 2) + + waitLeader(es) + if es[1].node.Leader() == 0 { + t.Errorf("#%d: lead = %d, want not 0", i, es[1].node.Leader()) + } + + for i := range es { + es[len(es)-i-1].Stop() + } + for i := range hs { + hs[len(hs)-i-1].Close() + } + } + afterTest(t) +} + +func TestRandomKill(t *testing.T) { + tests := []int{3, 5, 9, 11} + + for _, tt := range tests { + es, hs := buildCluster(tt, false) + waitCluster(t, es) + waitLeader(es) + + toKill := make(map[int64]struct{}) + for len(toKill) != tt/2-1 { + toKill[rand.Int63n(int64(tt))] = struct{}{} + } + for k := range toKill { + es[k].Stop() + } + + time.Sleep(es[0].tickDuration * defaultElection * 2) + + waitLeader(es) + + for i := range es { + es[len(es)-i-1].Stop() + } + for i := range hs { + hs[len(hs)-i-1].Close() + } + } + afterTest(t) +} + +type leadterm struct { + lead int64 + term int64 +} + +func waitLeader(es []*Server) { + for { + ls := make([]leadterm, 0, len(es)) + for i := range es { + switch es[i].mode { + case participant: + ls = append(ls, reportLead(es[i])) + case standby: + //TODO(xiangli) add standby support + case stop: + } + } + if isSameLead(ls) { + return + } + time.Sleep(es[0].tickDuration * defaultElection) + } +} + +func reportLead(s *Server) leadterm { + return leadterm{s.node.Leader(), s.node.Term()} +} + +func isSameLead(ls []leadterm) bool { + m := make(map[leadterm]int) + for i := range ls { + m[ls[i]] = m[ls[i]] + 1 + } + if len(m) == 1 { + return true + } + // todo(xiangli): printout the current cluster status for debugging.... + return false +} diff --git a/tests/functional/kill_leader_test.go b/tests/functional/kill_leader_test.go index 7c18d46be..80f2cee74 100644 --- a/tests/functional/kill_leader_test.go +++ b/tests/functional/kill_leader_test.go @@ -15,58 +15,6 @@ import ( "github.com/coreos/etcd/third_party/github.com/stretchr/testify/assert" ) -// This test will kill the current leader and wait for the etcd cluster to elect a new leader for 200 times. -// It will print out the election time and the average election time. -func TestKillLeader(t *testing.T) { - procAttr := new(os.ProcAttr) - procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} - - clusterSize := 3 - argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false) - if err != nil { - t.Fatal("cannot create cluster") - } - defer DestroyCluster(etcds) - - stop := make(chan bool) - leaderChan := make(chan string, 1) - all := make(chan bool, 1) - - time.Sleep(time.Second) - - go Monitor(clusterSize, 1, leaderChan, all, stop) - - var totalTime time.Duration - - leader := "http://127.0.0.1:7001" - - for i := 0; i < clusterSize; i++ { - fmt.Println("leader is ", leader) - port, _ := strconv.Atoi(strings.Split(leader, ":")[2]) - num := port - 7001 - fmt.Println("kill server ", num) - etcds[num].Kill() - etcds[num].Release() - - start := time.Now() - for { - newLeader := <-leaderChan - if newLeader != leader { - leader = newLeader - break - } - } - take := time.Now().Sub(start) - - totalTime += take - avgTime := totalTime / (time.Duration)(i+1) - fmt.Println("Total time:", totalTime, "; Avg time:", avgTime) - - etcds[num], err = os.StartProcess(EtcdBinPath, argGroup[num], procAttr) - } - stop <- true -} - // This test will kill the current leader and wait for the etcd cluster to elect a new leader for 200 times. // It will print out the election time and the average election time. // It runs in a cluster with standby nodes. diff --git a/tests/functional/kill_random_test.go b/tests/functional/kill_random_test.go deleted file mode 100644 index f8af96e23..000000000 --- a/tests/functional/kill_random_test.go +++ /dev/null @@ -1,75 +0,0 @@ -package test - -import ( - "fmt" - "math/rand" - "os" - "testing" - "time" -) - -// TestKillRandom kills random peers in the cluster and -// restart them after all other peers agree on the same leader -func TestKillRandom(t *testing.T) { - procAttr := new(os.ProcAttr) - procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} - - clusterSize := 9 - argGroup, etcds, err := CreateCluster(clusterSize, procAttr, false) - - if err != nil { - t.Fatal("cannot create cluster") - } - - defer DestroyCluster(etcds) - - stop := make(chan bool) - leaderChan := make(chan string, 1) - all := make(chan bool, 1) - - time.Sleep(3 * time.Second) - - go Monitor(clusterSize, 4, leaderChan, all, stop) - - toKill := make(map[int]bool) - - for i := 0; i < 20; i++ { - fmt.Printf("TestKillRandom Round[%d/20]\n", i) - - j := 0 - for { - - r := rand.Int31n(9) - if _, ok := toKill[int(r)]; !ok { - j++ - toKill[int(r)] = true - } - - if j > 3 { - break - } - - } - - for num := range toKill { - err := etcds[num].Kill() - if err != nil { - panic(err) - } - etcds[num].Wait() - } - - time.Sleep(1 * time.Second) - - <-leaderChan - - for num := range toKill { - etcds[num], err = os.StartProcess(EtcdBinPath, argGroup[num], procAttr) - } - - toKill = make(map[int]bool) - <-all - } - - stop <- true -}