mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
etcdserver: fix panic when checking IsLearner of removed member
Previously, calling s.IsLearner() when the local node is no longer a member panics. There was an attempt to fix this by first checking IsMemberExist(), but this is not a correct fix because the member could be removed between the two calls. Instead of panicking when the member was removed, IsLearner() should return false. A node which is not a member is also not a learner. There was a similar concurrency bug when accessing the IsLearner property of a member, which will panic with a nil pointer access error if the member is removed between the IsMemberExist() and Member() calls. Signed-off-by: Jan Schär <jan@monogon.tech>
This commit is contained in:
parent
5704c6148d
commit
605abca29d
@ -783,11 +783,7 @@ func (c *RaftCluster) IsLocalMemberLearner() bool {
|
||||
defer c.Unlock()
|
||||
localMember, ok := c.members[c.localID]
|
||||
if !ok {
|
||||
c.lg.Panic(
|
||||
"failed to find local ID in cluster members",
|
||||
zap.String("cluster-id", c.cid.String()),
|
||||
zap.String("local-member-id", c.localID.String()),
|
||||
)
|
||||
return false
|
||||
}
|
||||
return localMember.IsLearner
|
||||
}
|
||||
@ -816,6 +812,9 @@ func (c *RaftCluster) SetDowngradeInfo(d *serverversion.DowngradeInfo, shouldApp
|
||||
|
||||
// IsMemberExist returns if the member with the given id exists in cluster.
|
||||
func (c *RaftCluster) IsMemberExist(id types.ID) bool {
|
||||
// gofail: var sleepAfterIsMemberExist struct{}
|
||||
// defer time.Sleep(time.Second)
|
||||
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
_, ok := c.members[id]
|
||||
|
@ -49,7 +49,7 @@ func newUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerInterceptor {
|
||||
return nil, rpctypes.ErrGRPCNotCapable
|
||||
}
|
||||
|
||||
if s.IsMemberExist(s.MemberID()) && s.IsLearner() && !isRPCSupportedForLearner(req) {
|
||||
if s.IsLearner() && !isRPCSupportedForLearner(req) {
|
||||
return nil, rpctypes.ErrGRPCNotSupportedForLearner
|
||||
}
|
||||
|
||||
@ -218,7 +218,7 @@ func newStreamInterceptor(s *etcdserver.EtcdServer) grpc.StreamServerInterceptor
|
||||
return rpctypes.ErrGRPCNotCapable
|
||||
}
|
||||
|
||||
if s.IsMemberExist(s.MemberID()) && s.IsLearner() && info.FullMethod != snapshotMethod { // learner does not support stream RPC except Snapshot
|
||||
if s.IsLearner() && info.FullMethod != snapshotMethod { // learner does not support stream RPC except Snapshot
|
||||
return rpctypes.ErrGRPCNotSupportedForLearner
|
||||
}
|
||||
|
||||
|
@ -1224,7 +1224,8 @@ func (s *EtcdServer) isLeader() bool {
|
||||
|
||||
// MoveLeader transfers the leader to the given transferee.
|
||||
func (s *EtcdServer) MoveLeader(ctx context.Context, lead, transferee uint64) error {
|
||||
if !s.cluster.IsMemberExist(types.ID(transferee)) || s.cluster.Member(types.ID(transferee)).IsLearner {
|
||||
member := s.cluster.Member(types.ID(transferee))
|
||||
if member == nil || member.IsLearner {
|
||||
return errors.ErrBadLeaderTransferee
|
||||
}
|
||||
|
||||
@ -1593,9 +1594,9 @@ func (s *EtcdServer) mayRemoveMember(id types.ID) error {
|
||||
}
|
||||
|
||||
lg := s.Logger()
|
||||
isLearner := s.cluster.IsMemberExist(id) && s.cluster.Member(id).IsLearner
|
||||
member := s.cluster.Member(id)
|
||||
// no need to check quorum when removing non-voting member
|
||||
if isLearner {
|
||||
if member != nil && member.IsLearner {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -518,3 +518,51 @@ func TestSpeedyTerminate(t *testing.T) {
|
||||
case <-donec:
|
||||
}
|
||||
}
|
||||
|
||||
// TestConcurrentRemoveMember demonstrated a panic in mayRemoveMember with
|
||||
// concurrent calls to MemberRemove. To reliably reproduce the panic, a delay
|
||||
// needed to be injected in IsMemberExist, which is done using a failpoint.
|
||||
// After fixing the bug, IsMemberExist is no longer called by mayRemoveMember.
|
||||
func TestConcurrentRemoveMember(t *testing.T) {
|
||||
integration.BeforeTest(t, integration.WithFailpoint("sleepAfterIsMemberExist", `return`))
|
||||
c := integration.NewCluster(t, &integration.ClusterConfig{Size: 1})
|
||||
defer c.Terminate(t)
|
||||
|
||||
addResp, err := c.Members[0].Client.MemberAddAsLearner(context.Background(), []string{"http://localhost:123"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
removeID := addResp.Member.ID
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
time.Sleep(time.Second / 2)
|
||||
c.Members[0].Client.MemberRemove(context.Background(), removeID)
|
||||
close(done)
|
||||
}()
|
||||
if _, err := c.Members[0].Client.MemberRemove(context.Background(), removeID); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
<-done
|
||||
}
|
||||
|
||||
func TestConcurrentMoveLeader(t *testing.T) {
|
||||
integration.BeforeTest(t, integration.WithFailpoint("sleepAfterIsMemberExist", `return`))
|
||||
c := integration.NewCluster(t, &integration.ClusterConfig{Size: 1})
|
||||
defer c.Terminate(t)
|
||||
|
||||
addResp, err := c.Members[0].Client.MemberAddAsLearner(context.Background(), []string{"http://localhost:123"})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
removeID := addResp.Member.ID
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
time.Sleep(time.Second / 2)
|
||||
c.Members[0].Client.MoveLeader(context.Background(), removeID)
|
||||
close(done)
|
||||
}()
|
||||
if _, err := c.Members[0].Client.MemberRemove(context.Background(), removeID); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
<-done
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ install-gofail: $(GOPATH)/bin/gofail
|
||||
|
||||
.PHONY: gofail-enable
|
||||
gofail-enable: $(GOPATH)/bin/gofail
|
||||
$(GOPATH)/bin/gofail enable server/etcdserver/ server/lease/leasehttp server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/etcdserver/api/v3rpc/
|
||||
$(GOPATH)/bin/gofail enable server/etcdserver/ server/lease/leasehttp server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/etcdserver/api/v3rpc/ server/etcdserver/api/membership/
|
||||
cd ./server && go get go.etcd.io/gofail@${GOFAIL_VERSION}
|
||||
cd ./etcdutl && go get go.etcd.io/gofail@${GOFAIL_VERSION}
|
||||
cd ./etcdctl && go get go.etcd.io/gofail@${GOFAIL_VERSION}
|
||||
@ -62,7 +62,7 @@ gofail-enable: $(GOPATH)/bin/gofail
|
||||
|
||||
.PHONY: gofail-disable
|
||||
gofail-disable: $(GOPATH)/bin/gofail
|
||||
$(GOPATH)/bin/gofail disable server/etcdserver/ server/lease/leasehttp server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/etcdserver/api/v3rpc/
|
||||
$(GOPATH)/bin/gofail disable server/etcdserver/ server/lease/leasehttp server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/etcdserver/api/v3rpc/ server/etcdserver/api/membership/
|
||||
cd ./server && go mod tidy
|
||||
cd ./etcdutl && go mod tidy
|
||||
cd ./etcdctl && go mod tidy
|
||||
|
Loading…
x
Reference in New Issue
Block a user