mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Merge pull request #13932 from ahrtr/lease_renew_linearizable_3.5
[3.5] Support linearizable renew lease
This commit is contained in:
commit
719082e4fc
@ -75,6 +75,7 @@ var (
|
|||||||
ErrGRPCTimeout = status.New(codes.Unavailable, "etcdserver: request timed out").Err()
|
ErrGRPCTimeout = status.New(codes.Unavailable, "etcdserver: request timed out").Err()
|
||||||
ErrGRPCTimeoutDueToLeaderFail = status.New(codes.Unavailable, "etcdserver: request timed out, possibly due to previous leader failure").Err()
|
ErrGRPCTimeoutDueToLeaderFail = status.New(codes.Unavailable, "etcdserver: request timed out, possibly due to previous leader failure").Err()
|
||||||
ErrGRPCTimeoutDueToConnectionLost = status.New(codes.Unavailable, "etcdserver: request timed out, possibly due to connection lost").Err()
|
ErrGRPCTimeoutDueToConnectionLost = status.New(codes.Unavailable, "etcdserver: request timed out, possibly due to connection lost").Err()
|
||||||
|
ErrGRPCTimeoutWaitAppliedIndex = status.New(codes.Unavailable, "etcdserver: request timed out, waiting for the applied index took too long").Err()
|
||||||
ErrGRPCUnhealthy = status.New(codes.Unavailable, "etcdserver: unhealthy cluster").Err()
|
ErrGRPCUnhealthy = status.New(codes.Unavailable, "etcdserver: unhealthy cluster").Err()
|
||||||
ErrGRPCCorrupt = status.New(codes.DataLoss, "etcdserver: corrupt cluster").Err()
|
ErrGRPCCorrupt = status.New(codes.DataLoss, "etcdserver: corrupt cluster").Err()
|
||||||
ErrGPRCNotSupportedForLearner = status.New(codes.Unavailable, "etcdserver: rpc not supported for learner").Err()
|
ErrGPRCNotSupportedForLearner = status.New(codes.Unavailable, "etcdserver: rpc not supported for learner").Err()
|
||||||
@ -208,6 +209,7 @@ var (
|
|||||||
ErrTimeout = Error(ErrGRPCTimeout)
|
ErrTimeout = Error(ErrGRPCTimeout)
|
||||||
ErrTimeoutDueToLeaderFail = Error(ErrGRPCTimeoutDueToLeaderFail)
|
ErrTimeoutDueToLeaderFail = Error(ErrGRPCTimeoutDueToLeaderFail)
|
||||||
ErrTimeoutDueToConnectionLost = Error(ErrGRPCTimeoutDueToConnectionLost)
|
ErrTimeoutDueToConnectionLost = Error(ErrGRPCTimeoutDueToConnectionLost)
|
||||||
|
ErrTimeoutWaitAppliedIndex = Error(ErrGRPCTimeoutWaitAppliedIndex)
|
||||||
ErrUnhealthy = Error(ErrGRPCUnhealthy)
|
ErrUnhealthy = Error(ErrGRPCUnhealthy)
|
||||||
ErrCorrupt = Error(ErrGRPCCorrupt)
|
ErrCorrupt = Error(ErrGRPCCorrupt)
|
||||||
ErrBadLeaderTransferee = Error(ErrGRPCBadLeaderTransferee)
|
ErrBadLeaderTransferee = Error(ErrGRPCBadLeaderTransferee)
|
||||||
|
@ -53,6 +53,7 @@ var toGRPCErrorMap = map[error]error{
|
|||||||
etcdserver.ErrTimeout: rpctypes.ErrGRPCTimeout,
|
etcdserver.ErrTimeout: rpctypes.ErrGRPCTimeout,
|
||||||
etcdserver.ErrTimeoutDueToLeaderFail: rpctypes.ErrGRPCTimeoutDueToLeaderFail,
|
etcdserver.ErrTimeoutDueToLeaderFail: rpctypes.ErrGRPCTimeoutDueToLeaderFail,
|
||||||
etcdserver.ErrTimeoutDueToConnectionLost: rpctypes.ErrGRPCTimeoutDueToConnectionLost,
|
etcdserver.ErrTimeoutDueToConnectionLost: rpctypes.ErrGRPCTimeoutDueToConnectionLost,
|
||||||
|
etcdserver.ErrTimeoutWaitAppliedIndex: rpctypes.ErrGRPCTimeoutWaitAppliedIndex,
|
||||||
etcdserver.ErrUnhealthy: rpctypes.ErrGRPCUnhealthy,
|
etcdserver.ErrUnhealthy: rpctypes.ErrGRPCUnhealthy,
|
||||||
etcdserver.ErrKeyNotFound: rpctypes.ErrGRPCKeyNotFound,
|
etcdserver.ErrKeyNotFound: rpctypes.ErrGRPCKeyNotFound,
|
||||||
etcdserver.ErrCorrupt: rpctypes.ErrGRPCCorrupt,
|
etcdserver.ErrCorrupt: rpctypes.ErrGRPCCorrupt,
|
||||||
|
@ -27,6 +27,7 @@ var (
|
|||||||
ErrTimeoutDueToLeaderFail = errors.New("etcdserver: request timed out, possibly due to previous leader failure")
|
ErrTimeoutDueToLeaderFail = errors.New("etcdserver: request timed out, possibly due to previous leader failure")
|
||||||
ErrTimeoutDueToConnectionLost = errors.New("etcdserver: request timed out, possibly due to connection lost")
|
ErrTimeoutDueToConnectionLost = errors.New("etcdserver: request timed out, possibly due to connection lost")
|
||||||
ErrTimeoutLeaderTransfer = errors.New("etcdserver: request timed out, leader transfer took too long")
|
ErrTimeoutLeaderTransfer = errors.New("etcdserver: request timed out, leader transfer took too long")
|
||||||
|
ErrTimeoutWaitAppliedIndex = errors.New("etcdserver: request timed out, waiting for the applied index took too long")
|
||||||
ErrLeaderChanged = errors.New("etcdserver: leader changed")
|
ErrLeaderChanged = errors.New("etcdserver: leader changed")
|
||||||
ErrNotEnoughStartedMembers = errors.New("etcdserver: re-configuration failed due to not enough started members")
|
ErrNotEnoughStartedMembers = errors.New("etcdserver: re-configuration failed due to not enough started members")
|
||||||
ErrLearnerNotReady = errors.New("etcdserver: can only promote a learner member which is in sync with leader")
|
ErrLearnerNotReady = errors.New("etcdserver: can only promote a learner member which is in sync with leader")
|
||||||
|
@ -2036,3 +2036,59 @@ func (s *sendMsgAppRespTransporter) Send(m []raftpb.Message) {
|
|||||||
}
|
}
|
||||||
s.sendC <- send
|
s.sendC <- send
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestWaitAppliedIndex(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
name string
|
||||||
|
appliedIndex uint64
|
||||||
|
committedIndex uint64
|
||||||
|
action func(s *EtcdServer)
|
||||||
|
ExpectedError error
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "The applied Id is already equal to the commitId",
|
||||||
|
appliedIndex: 10,
|
||||||
|
committedIndex: 10,
|
||||||
|
action: func(s *EtcdServer) {
|
||||||
|
s.applyWait.Trigger(10)
|
||||||
|
},
|
||||||
|
ExpectedError: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "The etcd server has already stopped",
|
||||||
|
appliedIndex: 10,
|
||||||
|
committedIndex: 12,
|
||||||
|
action: func(s *EtcdServer) {
|
||||||
|
s.stopping <- struct{}{}
|
||||||
|
},
|
||||||
|
ExpectedError: ErrStopped,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Timed out waiting for the applied index",
|
||||||
|
appliedIndex: 10,
|
||||||
|
committedIndex: 12,
|
||||||
|
action: nil,
|
||||||
|
ExpectedError: ErrTimeoutWaitAppliedIndex,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tc := range cases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
s := &EtcdServer{
|
||||||
|
appliedIndex: tc.appliedIndex,
|
||||||
|
committedIndex: tc.committedIndex,
|
||||||
|
stopping: make(chan struct{}, 1),
|
||||||
|
applyWait: wait.NewTimeList(),
|
||||||
|
}
|
||||||
|
|
||||||
|
if tc.action != nil {
|
||||||
|
go tc.action(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
err := s.waitAppliedIndex()
|
||||||
|
|
||||||
|
if err != tc.ExpectedError {
|
||||||
|
t.Errorf("Unexpected error, want (%v), got (%v)", tc.ExpectedError, err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -45,6 +45,10 @@ const (
|
|||||||
maxGapBetweenApplyAndCommitIndex = 5000
|
maxGapBetweenApplyAndCommitIndex = 5000
|
||||||
traceThreshold = 100 * time.Millisecond
|
traceThreshold = 100 * time.Millisecond
|
||||||
readIndexRetryTime = 500 * time.Millisecond
|
readIndexRetryTime = 500 * time.Millisecond
|
||||||
|
|
||||||
|
// The timeout for the node to catch up its applied index, and is used in
|
||||||
|
// lease related operations, such as LeaseRenew and LeaseTimeToLive.
|
||||||
|
applyTimeout = time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
type RaftKV interface {
|
type RaftKV interface {
|
||||||
@ -271,6 +275,18 @@ func (s *EtcdServer) LeaseGrant(ctx context.Context, r *pb.LeaseGrantRequest) (*
|
|||||||
return resp.(*pb.LeaseGrantResponse), nil
|
return resp.(*pb.LeaseGrantResponse), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *EtcdServer) waitAppliedIndex() error {
|
||||||
|
select {
|
||||||
|
case <-s.ApplyWait():
|
||||||
|
case <-s.stopping:
|
||||||
|
return ErrStopped
|
||||||
|
case <-time.After(applyTimeout):
|
||||||
|
return ErrTimeoutWaitAppliedIndex
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (s *EtcdServer) LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
|
func (s *EtcdServer) LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
|
||||||
resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{LeaseRevoke: r})
|
resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{LeaseRevoke: r})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -280,6 +296,11 @@ func (s *EtcdServer) LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest)
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *EtcdServer) LeaseRenew(ctx context.Context, id lease.LeaseID) (int64, error) {
|
func (s *EtcdServer) LeaseRenew(ctx context.Context, id lease.LeaseID) (int64, error) {
|
||||||
|
if s.isLeader() {
|
||||||
|
if err := s.waitAppliedIndex(); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
ttl, err := s.lessor.Renew(id)
|
ttl, err := s.lessor.Renew(id)
|
||||||
if err == nil { // already requested to primary lessor(leader)
|
if err == nil { // already requested to primary lessor(leader)
|
||||||
return ttl, nil
|
return ttl, nil
|
||||||
@ -287,19 +308,20 @@ func (s *EtcdServer) LeaseRenew(ctx context.Context, id lease.LeaseID) (int64, e
|
|||||||
if err != lease.ErrNotPrimary {
|
if err != lease.ErrNotPrimary {
|
||||||
return -1, err
|
return -1, err
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout())
|
cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
// renewals don't go through raft; forward to leader manually
|
// renewals don't go through raft; forward to leader manually
|
||||||
for cctx.Err() == nil && err != nil {
|
for cctx.Err() == nil {
|
||||||
leader, lerr := s.waitLeader(cctx)
|
leader, lerr := s.waitLeader(cctx)
|
||||||
if lerr != nil {
|
if lerr != nil {
|
||||||
return -1, lerr
|
return -1, lerr
|
||||||
}
|
}
|
||||||
for _, url := range leader.PeerURLs {
|
for _, url := range leader.PeerURLs {
|
||||||
lurl := url + leasehttp.LeasePrefix
|
lurl := url + leasehttp.LeasePrefix
|
||||||
ttl, err = leasehttp.RenewHTTP(cctx, id, lurl, s.peerRt)
|
ttl, err := leasehttp.RenewHTTP(cctx, id, lurl, s.peerRt)
|
||||||
if err == nil || err == lease.ErrLeaseNotFound {
|
if err == nil || err == lease.ErrLeaseNotFound {
|
||||||
return ttl, err
|
return ttl, err
|
||||||
}
|
}
|
||||||
@ -315,7 +337,10 @@ func (s *EtcdServer) LeaseRenew(ctx context.Context, id lease.LeaseID) (int64, e
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *EtcdServer) LeaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveRequest) (*pb.LeaseTimeToLiveResponse, error) {
|
func (s *EtcdServer) LeaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveRequest) (*pb.LeaseTimeToLiveResponse, error) {
|
||||||
if s.Leader() == s.ID() {
|
if s.isLeader() {
|
||||||
|
if err := s.waitAppliedIndex(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
// primary; timetolive directly from leader
|
// primary; timetolive directly from leader
|
||||||
le := s.lessor.Lookup(lease.LeaseID(r.ID))
|
le := s.lessor.Lookup(lease.LeaseID(r.ID))
|
||||||
if le == nil {
|
if le == nil {
|
||||||
|
@ -1454,7 +1454,7 @@ func (c *ClusterV3) Client(i int) *clientv3.Client {
|
|||||||
|
|
||||||
func (c *ClusterV3) ClusterClient() (client *clientv3.Client, err error) {
|
func (c *ClusterV3) ClusterClient() (client *clientv3.Client, err error) {
|
||||||
if c.clusterClient == nil {
|
if c.clusterClient == nil {
|
||||||
endpoints := []string{}
|
var endpoints []string
|
||||||
for _, m := range c.Members {
|
for _, m := range c.Members {
|
||||||
endpoints = append(endpoints, m.grpcURL)
|
endpoints = append(endpoints, m.grpcURL)
|
||||||
}
|
}
|
||||||
|
@ -16,6 +16,7 @@ package integration
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"testing"
|
"testing"
|
||||||
@ -498,17 +499,31 @@ func TestV3LeaseLeases(t *testing.T) {
|
|||||||
// it was oberserved that the immediate lease renewal after granting a lease from follower resulted lease not found.
|
// it was oberserved that the immediate lease renewal after granting a lease from follower resulted lease not found.
|
||||||
// related issue https://github.com/etcd-io/etcd/issues/6978
|
// related issue https://github.com/etcd-io/etcd/issues/6978
|
||||||
func TestV3LeaseRenewStress(t *testing.T) {
|
func TestV3LeaseRenewStress(t *testing.T) {
|
||||||
testLeaseStress(t, stressLeaseRenew)
|
testLeaseStress(t, stressLeaseRenew, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestV3LeaseRenewStressWithClusterClient is similar to TestV3LeaseRenewStress,
|
||||||
|
// but it uses a cluster client instead of a specific member's client.
|
||||||
|
// The related issue is https://github.com/etcd-io/etcd/issues/13675.
|
||||||
|
func TestV3LeaseRenewStressWithClusterClient(t *testing.T) {
|
||||||
|
testLeaseStress(t, stressLeaseRenew, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestV3LeaseTimeToLiveStress keeps creating lease and retrieving it immediately to ensure the lease can be retrieved.
|
// TestV3LeaseTimeToLiveStress keeps creating lease and retrieving it immediately to ensure the lease can be retrieved.
|
||||||
// it was oberserved that the immediate lease retrieval after granting a lease from follower resulted lease not found.
|
// it was oberserved that the immediate lease retrieval after granting a lease from follower resulted lease not found.
|
||||||
// related issue https://github.com/etcd-io/etcd/issues/6978
|
// related issue https://github.com/etcd-io/etcd/issues/6978
|
||||||
func TestV3LeaseTimeToLiveStress(t *testing.T) {
|
func TestV3LeaseTimeToLiveStress(t *testing.T) {
|
||||||
testLeaseStress(t, stressLeaseTimeToLive)
|
testLeaseStress(t, stressLeaseTimeToLive, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
func testLeaseStress(t *testing.T, stresser func(context.Context, pb.LeaseClient) error) {
|
// TestV3LeaseTimeToLiveStressWithClusterClient is similar to TestV3LeaseTimeToLiveStress,
|
||||||
|
// but it uses a cluster client instead of a specific member's client.
|
||||||
|
// The related issue is https://github.com/etcd-io/etcd/issues/13675.
|
||||||
|
func TestV3LeaseTimeToLiveStressWithClusterClient(t *testing.T) {
|
||||||
|
testLeaseStress(t, stressLeaseTimeToLive, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testLeaseStress(t *testing.T, stresser func(context.Context, pb.LeaseClient) error, useClusterClient bool) {
|
||||||
BeforeTest(t)
|
BeforeTest(t)
|
||||||
clus := NewClusterV3(t, &ClusterConfig{Size: 3})
|
clus := NewClusterV3(t, &ClusterConfig{Size: 3})
|
||||||
defer clus.Terminate(t)
|
defer clus.Terminate(t)
|
||||||
@ -517,13 +532,23 @@ func testLeaseStress(t *testing.T, stresser func(context.Context, pb.LeaseClient
|
|||||||
defer cancel()
|
defer cancel()
|
||||||
errc := make(chan error)
|
errc := make(chan error)
|
||||||
|
|
||||||
for i := 0; i < 30; i++ {
|
if useClusterClient {
|
||||||
|
for i := 0; i < 300; i++ {
|
||||||
|
clusterClient, err := clus.ClusterClient()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
go func(i int) { errc <- stresser(ctx, toGRPC(clusterClient).Lease) }(i)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for i := 0; i < 100; i++ {
|
||||||
for j := 0; j < 3; j++ {
|
for j := 0; j < 3; j++ {
|
||||||
go func(i int) { errc <- stresser(ctx, toGRPC(clus.Client(i)).Lease) }(j)
|
go func(i int) { errc <- stresser(ctx, toGRPC(clus.Client(i)).Lease) }(j)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for i := 0; i < 90; i++ {
|
for i := 0; i < 300; i++ {
|
||||||
if err := <-errc; err != nil {
|
if err := <-errc; err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@ -554,7 +579,7 @@ func stressLeaseRenew(tctx context.Context, lc pb.LeaseClient) (reterr error) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if rresp.TTL == 0 {
|
if rresp.TTL == 0 {
|
||||||
return fmt.Errorf("TTL shouldn't be 0 so soon")
|
return errors.New("TTL shouldn't be 0 so soon")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
Loading…
x
Reference in New Issue
Block a user