mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
etcdserver: fix panic when checking IsLearner of removed member
Previously, calling s.IsLearner() when the local node is no longer a member panics. There was an attempt to fix this by first checking IsMemberExist(), but this is not a correct fix because the member could be removed between the two calls. Instead of panicking when the member was removed, IsLearner() should return false. A node which is not a member is also not a learner. There was a similar concurrency bug when accessing the IsLearner property of a member, which will panic with a nil pointer access error if the member is removed between the IsMemberExist() and Member() calls. Signed-off-by: Jan Schär <jan@monogon.tech>
This commit is contained in:
parent
5704c6148d
commit
605abca29d
@ -783,11 +783,7 @@ func (c *RaftCluster) IsLocalMemberLearner() bool {
|
|||||||
defer c.Unlock()
|
defer c.Unlock()
|
||||||
localMember, ok := c.members[c.localID]
|
localMember, ok := c.members[c.localID]
|
||||||
if !ok {
|
if !ok {
|
||||||
c.lg.Panic(
|
return false
|
||||||
"failed to find local ID in cluster members",
|
|
||||||
zap.String("cluster-id", c.cid.String()),
|
|
||||||
zap.String("local-member-id", c.localID.String()),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
return localMember.IsLearner
|
return localMember.IsLearner
|
||||||
}
|
}
|
||||||
@ -816,6 +812,9 @@ func (c *RaftCluster) SetDowngradeInfo(d *serverversion.DowngradeInfo, shouldApp
|
|||||||
|
|
||||||
// IsMemberExist returns if the member with the given id exists in cluster.
|
// IsMemberExist returns if the member with the given id exists in cluster.
|
||||||
func (c *RaftCluster) IsMemberExist(id types.ID) bool {
|
func (c *RaftCluster) IsMemberExist(id types.ID) bool {
|
||||||
|
// gofail: var sleepAfterIsMemberExist struct{}
|
||||||
|
// defer time.Sleep(time.Second)
|
||||||
|
|
||||||
c.Lock()
|
c.Lock()
|
||||||
defer c.Unlock()
|
defer c.Unlock()
|
||||||
_, ok := c.members[id]
|
_, ok := c.members[id]
|
||||||
|
|||||||
@ -49,7 +49,7 @@ func newUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerInterceptor {
|
|||||||
return nil, rpctypes.ErrGRPCNotCapable
|
return nil, rpctypes.ErrGRPCNotCapable
|
||||||
}
|
}
|
||||||
|
|
||||||
if s.IsMemberExist(s.MemberID()) && s.IsLearner() && !isRPCSupportedForLearner(req) {
|
if s.IsLearner() && !isRPCSupportedForLearner(req) {
|
||||||
return nil, rpctypes.ErrGRPCNotSupportedForLearner
|
return nil, rpctypes.ErrGRPCNotSupportedForLearner
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -218,7 +218,7 @@ func newStreamInterceptor(s *etcdserver.EtcdServer) grpc.StreamServerInterceptor
|
|||||||
return rpctypes.ErrGRPCNotCapable
|
return rpctypes.ErrGRPCNotCapable
|
||||||
}
|
}
|
||||||
|
|
||||||
if s.IsMemberExist(s.MemberID()) && s.IsLearner() && info.FullMethod != snapshotMethod { // learner does not support stream RPC except Snapshot
|
if s.IsLearner() && info.FullMethod != snapshotMethod { // learner does not support stream RPC except Snapshot
|
||||||
return rpctypes.ErrGRPCNotSupportedForLearner
|
return rpctypes.ErrGRPCNotSupportedForLearner
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1224,7 +1224,8 @@ func (s *EtcdServer) isLeader() bool {
|
|||||||
|
|
||||||
// MoveLeader transfers the leader to the given transferee.
|
// MoveLeader transfers the leader to the given transferee.
|
||||||
func (s *EtcdServer) MoveLeader(ctx context.Context, lead, transferee uint64) error {
|
func (s *EtcdServer) MoveLeader(ctx context.Context, lead, transferee uint64) error {
|
||||||
if !s.cluster.IsMemberExist(types.ID(transferee)) || s.cluster.Member(types.ID(transferee)).IsLearner {
|
member := s.cluster.Member(types.ID(transferee))
|
||||||
|
if member == nil || member.IsLearner {
|
||||||
return errors.ErrBadLeaderTransferee
|
return errors.ErrBadLeaderTransferee
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1593,9 +1594,9 @@ func (s *EtcdServer) mayRemoveMember(id types.ID) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
lg := s.Logger()
|
lg := s.Logger()
|
||||||
isLearner := s.cluster.IsMemberExist(id) && s.cluster.Member(id).IsLearner
|
member := s.cluster.Member(id)
|
||||||
// no need to check quorum when removing non-voting member
|
// no need to check quorum when removing non-voting member
|
||||||
if isLearner {
|
if member != nil && member.IsLearner {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -518,3 +518,51 @@ func TestSpeedyTerminate(t *testing.T) {
|
|||||||
case <-donec:
|
case <-donec:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestConcurrentRemoveMember demonstrated a panic in mayRemoveMember with
|
||||||
|
// concurrent calls to MemberRemove. To reliably reproduce the panic, a delay
|
||||||
|
// needed to be injected in IsMemberExist, which is done using a failpoint.
|
||||||
|
// After fixing the bug, IsMemberExist is no longer called by mayRemoveMember.
|
||||||
|
func TestConcurrentRemoveMember(t *testing.T) {
|
||||||
|
integration.BeforeTest(t, integration.WithFailpoint("sleepAfterIsMemberExist", `return`))
|
||||||
|
c := integration.NewCluster(t, &integration.ClusterConfig{Size: 1})
|
||||||
|
defer c.Terminate(t)
|
||||||
|
|
||||||
|
addResp, err := c.Members[0].Client.MemberAddAsLearner(context.Background(), []string{"http://localhost:123"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
removeID := addResp.Member.ID
|
||||||
|
done := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
time.Sleep(time.Second / 2)
|
||||||
|
c.Members[0].Client.MemberRemove(context.Background(), removeID)
|
||||||
|
close(done)
|
||||||
|
}()
|
||||||
|
if _, err := c.Members[0].Client.MemberRemove(context.Background(), removeID); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
<-done
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConcurrentMoveLeader(t *testing.T) {
|
||||||
|
integration.BeforeTest(t, integration.WithFailpoint("sleepAfterIsMemberExist", `return`))
|
||||||
|
c := integration.NewCluster(t, &integration.ClusterConfig{Size: 1})
|
||||||
|
defer c.Terminate(t)
|
||||||
|
|
||||||
|
addResp, err := c.Members[0].Client.MemberAddAsLearner(context.Background(), []string{"http://localhost:123"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
removeID := addResp.Member.ID
|
||||||
|
done := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
time.Sleep(time.Second / 2)
|
||||||
|
c.Members[0].Client.MoveLeader(context.Background(), removeID)
|
||||||
|
close(done)
|
||||||
|
}()
|
||||||
|
if _, err := c.Members[0].Client.MemberRemove(context.Background(), removeID); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
<-done
|
||||||
|
}
|
||||||
|
|||||||
@ -54,7 +54,7 @@ install-gofail: $(GOPATH)/bin/gofail
|
|||||||
|
|
||||||
.PHONY: gofail-enable
|
.PHONY: gofail-enable
|
||||||
gofail-enable: $(GOPATH)/bin/gofail
|
gofail-enable: $(GOPATH)/bin/gofail
|
||||||
$(GOPATH)/bin/gofail enable server/etcdserver/ server/lease/leasehttp server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/etcdserver/api/v3rpc/
|
$(GOPATH)/bin/gofail enable server/etcdserver/ server/lease/leasehttp server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/etcdserver/api/v3rpc/ server/etcdserver/api/membership/
|
||||||
cd ./server && go get go.etcd.io/gofail@${GOFAIL_VERSION}
|
cd ./server && go get go.etcd.io/gofail@${GOFAIL_VERSION}
|
||||||
cd ./etcdutl && go get go.etcd.io/gofail@${GOFAIL_VERSION}
|
cd ./etcdutl && go get go.etcd.io/gofail@${GOFAIL_VERSION}
|
||||||
cd ./etcdctl && go get go.etcd.io/gofail@${GOFAIL_VERSION}
|
cd ./etcdctl && go get go.etcd.io/gofail@${GOFAIL_VERSION}
|
||||||
@ -62,7 +62,7 @@ gofail-enable: $(GOPATH)/bin/gofail
|
|||||||
|
|
||||||
.PHONY: gofail-disable
|
.PHONY: gofail-disable
|
||||||
gofail-disable: $(GOPATH)/bin/gofail
|
gofail-disable: $(GOPATH)/bin/gofail
|
||||||
$(GOPATH)/bin/gofail disable server/etcdserver/ server/lease/leasehttp server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/etcdserver/api/v3rpc/
|
$(GOPATH)/bin/gofail disable server/etcdserver/ server/lease/leasehttp server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/etcdserver/api/v3rpc/ server/etcdserver/api/membership/
|
||||||
cd ./server && go mod tidy
|
cd ./server && go mod tidy
|
||||||
cd ./etcdutl && go mod tidy
|
cd ./etcdutl && go mod tidy
|
||||||
cd ./etcdctl && go mod tidy
|
cd ./etcdctl && go mod tidy
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user