integration: test member removal which breaks active quorum is rejected

This commit is contained in:
Anthony Romano 2016-08-10 16:30:03 -07:00
parent a1ce07a321
commit e742ff331f
2 changed files with 54 additions and 1 deletions

View File

@ -276,12 +276,18 @@ func (c *cluster) AddMember(t *testing.T) {
}
func (c *cluster) RemoveMember(t *testing.T, id uint64) {
if err := c.removeMember(t, id); err != nil {
t.Fatal(err)
}
}
func (c *cluster) removeMember(t *testing.T, id uint64) error {
// send remove request to the cluster
cc := MustNewHTTPClient(t, c.URLs(), c.cfg.ClientTLS)
ma := client.NewMembersAPI(cc)
ctx, cancel := context.WithTimeout(context.Background(), requestTimeout)
if err := ma.Remove(ctx, types.ID(id).String()); err != nil {
t.Fatalf("unexpected remove error %v", err)
return err
}
cancel()
newMembers := make([]*member, 0)
@ -302,6 +308,7 @@ func (c *cluster) RemoveMember(t *testing.T, id uint64) {
}
c.Members = newMembers
c.waitMembersMatch(t, c.HTTPMembers())
return nil
}
func (c *cluster) Terminate(t *testing.T) {

View File

@ -391,6 +391,52 @@ func TestRejectUnhealthyAdd(t *testing.T) {
}
}
// TestRejectUnhealthyRemove ensures an unhealthy cluster rejects removing members
// if quorum will be lost.
func TestRejectUnhealthyRemove(t *testing.T) {
defer testutil.AfterTest(t)
c := NewCluster(t, 5)
for _, m := range c.Members {
m.ServerConfig.StrictReconfigCheck = true
}
c.Launch(t)
defer c.Terminate(t)
// make cluster unhealthy and wait for downed peer; (3 up, 2 down)
c.Members[0].Stop(t)
c.Members[1].Stop(t)
c.WaitLeader(t)
// reject remove active member since (3,2)-(1,0) => (2,2) lacks quorum
err := c.removeMember(t, uint64(c.Members[2].s.ID()))
if err == nil {
t.Fatalf("should reject quorum breaking remove")
}
// TODO: client should return more descriptive error codes for internal errors
if !strings.Contains(err.Error(), "has no leader") {
t.Errorf("unexpected error (%v)", err)
}
// member stopped after launch; wait for missing heartbeats
time.Sleep(time.Duration(electionTicks * int(tickDuration)))
// permit remove dead member since (3,2) - (0,1) => (3,1) has quorum
if err = c.removeMember(t, uint64(c.Members[0].s.ID())); err != nil {
t.Fatalf("should accept removing down member")
}
// bring cluster to (4,1)
c.Members[0].Restart(t)
// restarted member must be connected for a HealthInterval before remove is accepted
time.Sleep((3 * etcdserver.HealthInterval) / 2)
// accept remove member since (4,1)-(1,0) => (3,1) has quorum
if err = c.removeMember(t, uint64(c.Members[0].s.ID())); err != nil {
t.Fatalf("expected to remove member, got error %v", err)
}
}
// clusterMustProgress ensures that cluster can make progress. It creates
// a random key first, and check the new key could be got from all client urls
// of the cluster.