mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00

Empty apply indicates first commit in current term. It is first time when follower is sure, that it's ReadIndex request can be processed.
992 lines
30 KiB
Go
992 lines
30 KiB
Go
// Copyright 2015 The etcd Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package etcdserver
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/base64"
|
|
"encoding/binary"
|
|
"strconv"
|
|
"time"
|
|
|
|
pb "go.etcd.io/etcd/api/v3/etcdserverpb"
|
|
"go.etcd.io/etcd/api/v3/membershippb"
|
|
"go.etcd.io/etcd/pkg/v3/traceutil"
|
|
"go.etcd.io/etcd/raft/v3"
|
|
"go.etcd.io/etcd/server/v3/auth"
|
|
"go.etcd.io/etcd/server/v3/etcdserver/api/membership"
|
|
"go.etcd.io/etcd/server/v3/lease"
|
|
"go.etcd.io/etcd/server/v3/lease/leasehttp"
|
|
"go.etcd.io/etcd/server/v3/mvcc"
|
|
|
|
"github.com/gogo/protobuf/proto"
|
|
"go.uber.org/zap"
|
|
"golang.org/x/crypto/bcrypt"
|
|
)
|
|
|
|
const (
|
|
// In the health case, there might be a small gap (10s of entries) between
|
|
// the applied index and committed index.
|
|
// However, if the committed entries are very heavy to apply, the gap might grow.
|
|
// We should stop accepting new proposals if the gap growing to a certain point.
|
|
maxGapBetweenApplyAndCommitIndex = 5000
|
|
traceThreshold = 100 * time.Millisecond
|
|
readIndexRetryTime = 500 * time.Millisecond
|
|
)
|
|
|
|
type RaftKV interface {
|
|
Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error)
|
|
Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error)
|
|
DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error)
|
|
Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error)
|
|
Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error)
|
|
}
|
|
|
|
type Lessor interface {
|
|
// LeaseGrant sends LeaseGrant request to raft and apply it after committed.
|
|
LeaseGrant(ctx context.Context, r *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error)
|
|
// LeaseRevoke sends LeaseRevoke request to raft and apply it after committed.
|
|
LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error)
|
|
|
|
// LeaseRenew renews the lease with given ID. The renewed TTL is returned. Or an error
|
|
// is returned.
|
|
LeaseRenew(ctx context.Context, id lease.LeaseID) (int64, error)
|
|
|
|
// LeaseTimeToLive retrieves lease information.
|
|
LeaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveRequest) (*pb.LeaseTimeToLiveResponse, error)
|
|
|
|
// LeaseLeases lists all leases.
|
|
LeaseLeases(ctx context.Context, r *pb.LeaseLeasesRequest) (*pb.LeaseLeasesResponse, error)
|
|
}
|
|
|
|
type Authenticator interface {
|
|
AuthEnable(ctx context.Context, r *pb.AuthEnableRequest) (*pb.AuthEnableResponse, error)
|
|
AuthDisable(ctx context.Context, r *pb.AuthDisableRequest) (*pb.AuthDisableResponse, error)
|
|
AuthStatus(ctx context.Context, r *pb.AuthStatusRequest) (*pb.AuthStatusResponse, error)
|
|
Authenticate(ctx context.Context, r *pb.AuthenticateRequest) (*pb.AuthenticateResponse, error)
|
|
UserAdd(ctx context.Context, r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error)
|
|
UserDelete(ctx context.Context, r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error)
|
|
UserChangePassword(ctx context.Context, r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error)
|
|
UserGrantRole(ctx context.Context, r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error)
|
|
UserGet(ctx context.Context, r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error)
|
|
UserRevokeRole(ctx context.Context, r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error)
|
|
RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error)
|
|
RoleGrantPermission(ctx context.Context, r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error)
|
|
RoleGet(ctx context.Context, r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error)
|
|
RoleRevokePermission(ctx context.Context, r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error)
|
|
RoleDelete(ctx context.Context, r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error)
|
|
UserList(ctx context.Context, r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error)
|
|
RoleList(ctx context.Context, r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error)
|
|
}
|
|
|
|
func (s *EtcdServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) {
|
|
trace := traceutil.New("range",
|
|
s.Logger(),
|
|
traceutil.Field{Key: "range_begin", Value: string(r.Key)},
|
|
traceutil.Field{Key: "range_end", Value: string(r.RangeEnd)},
|
|
)
|
|
ctx = context.WithValue(ctx, traceutil.TraceKey, trace)
|
|
|
|
var resp *pb.RangeResponse
|
|
var err error
|
|
defer func(start time.Time) {
|
|
warnOfExpensiveReadOnlyRangeRequest(s.Logger(), s.Cfg.WarningApplyDuration, start, r, resp, err)
|
|
if resp != nil {
|
|
trace.AddField(
|
|
traceutil.Field{Key: "response_count", Value: len(resp.Kvs)},
|
|
traceutil.Field{Key: "response_revision", Value: resp.Header.Revision},
|
|
)
|
|
}
|
|
trace.LogIfLong(traceThreshold)
|
|
}(time.Now())
|
|
|
|
if !r.Serializable {
|
|
err = s.linearizableReadNotify(ctx)
|
|
trace.Step("agreement among raft nodes before linearized reading")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
chk := func(ai *auth.AuthInfo) error {
|
|
return s.authStore.IsRangePermitted(ai, r.Key, r.RangeEnd)
|
|
}
|
|
|
|
get := func() { resp, err = s.applyV3Base.Range(ctx, nil, r) }
|
|
if serr := s.doSerialize(ctx, chk, get); serr != nil {
|
|
err = serr
|
|
return nil, err
|
|
}
|
|
return resp, err
|
|
}
|
|
|
|
func (s *EtcdServer) Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) {
|
|
ctx = context.WithValue(ctx, traceutil.StartTimeKey, time.Now())
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{Put: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.PutResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{DeleteRange: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.DeleteRangeResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error) {
|
|
if isTxnReadonly(r) {
|
|
trace := traceutil.New("transaction",
|
|
s.Logger(),
|
|
traceutil.Field{Key: "read_only", Value: true},
|
|
)
|
|
ctx = context.WithValue(ctx, traceutil.TraceKey, trace)
|
|
if !isTxnSerializable(r) {
|
|
err := s.linearizableReadNotify(ctx)
|
|
trace.Step("agreement among raft nodes before linearized reading")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
var resp *pb.TxnResponse
|
|
var err error
|
|
chk := func(ai *auth.AuthInfo) error {
|
|
return checkTxnAuth(s.authStore, ai, r)
|
|
}
|
|
|
|
defer func(start time.Time) {
|
|
warnOfExpensiveReadOnlyTxnRequest(s.Logger(), s.Cfg.WarningApplyDuration, start, r, resp, err)
|
|
trace.LogIfLong(traceThreshold)
|
|
}(time.Now())
|
|
|
|
get := func() { resp, _, err = s.applyV3Base.Txn(ctx, r) }
|
|
if serr := s.doSerialize(ctx, chk, get); serr != nil {
|
|
return nil, serr
|
|
}
|
|
return resp, err
|
|
}
|
|
|
|
ctx = context.WithValue(ctx, traceutil.StartTimeKey, time.Now())
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{Txn: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.TxnResponse), nil
|
|
}
|
|
|
|
func isTxnSerializable(r *pb.TxnRequest) bool {
|
|
for _, u := range r.Success {
|
|
if r := u.GetRequestRange(); r == nil || !r.Serializable {
|
|
return false
|
|
}
|
|
}
|
|
for _, u := range r.Failure {
|
|
if r := u.GetRequestRange(); r == nil || !r.Serializable {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func isTxnReadonly(r *pb.TxnRequest) bool {
|
|
for _, u := range r.Success {
|
|
if r := u.GetRequestRange(); r == nil {
|
|
return false
|
|
}
|
|
}
|
|
for _, u := range r.Failure {
|
|
if r := u.GetRequestRange(); r == nil {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func (s *EtcdServer) Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error) {
|
|
startTime := time.Now()
|
|
result, err := s.processInternalRaftRequestOnce(ctx, pb.InternalRaftRequest{Compaction: r})
|
|
trace := traceutil.TODO()
|
|
if result != nil && result.trace != nil {
|
|
trace = result.trace
|
|
defer func() {
|
|
trace.LogIfLong(traceThreshold)
|
|
}()
|
|
applyStart := result.trace.GetStartTime()
|
|
result.trace.SetStartTime(startTime)
|
|
trace.InsertStep(0, applyStart, "process raft request")
|
|
}
|
|
if r.Physical && result != nil && result.physc != nil {
|
|
<-result.physc
|
|
// The compaction is done deleting keys; the hash is now settled
|
|
// but the data is not necessarily committed. If there's a crash,
|
|
// the hash may revert to a hash prior to compaction completing
|
|
// if the compaction resumes. Force the finished compaction to
|
|
// commit so it won't resume following a crash.
|
|
s.be.ForceCommit()
|
|
trace.Step("physically apply compaction")
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if result.err != nil {
|
|
return nil, result.err
|
|
}
|
|
resp := result.resp.(*pb.CompactionResponse)
|
|
if resp == nil {
|
|
resp = &pb.CompactionResponse{}
|
|
}
|
|
if resp.Header == nil {
|
|
resp.Header = &pb.ResponseHeader{}
|
|
}
|
|
resp.Header.Revision = s.kv.Rev()
|
|
trace.AddField(traceutil.Field{Key: "response_revision", Value: resp.Header.Revision})
|
|
return resp, nil
|
|
}
|
|
|
|
func (s *EtcdServer) LeaseGrant(ctx context.Context, r *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
|
|
// no id given? choose one
|
|
for r.ID == int64(lease.NoLease) {
|
|
// only use positive int64 id's
|
|
r.ID = int64(s.reqIDGen.Next() & ((1 << 63) - 1))
|
|
}
|
|
resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{LeaseGrant: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.LeaseGrantResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
|
|
resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{LeaseRevoke: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.LeaseRevokeResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) LeaseRenew(ctx context.Context, id lease.LeaseID) (int64, error) {
|
|
ttl, err := s.lessor.Renew(id)
|
|
if err == nil { // already requested to primary lessor(leader)
|
|
return ttl, nil
|
|
}
|
|
if err != lease.ErrNotPrimary {
|
|
return -1, err
|
|
}
|
|
|
|
cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout())
|
|
defer cancel()
|
|
|
|
// renewals don't go through raft; forward to leader manually
|
|
for cctx.Err() == nil && err != nil {
|
|
leader, lerr := s.waitLeader(cctx)
|
|
if lerr != nil {
|
|
return -1, lerr
|
|
}
|
|
for _, url := range leader.PeerURLs {
|
|
lurl := url + leasehttp.LeasePrefix
|
|
ttl, err = leasehttp.RenewHTTP(cctx, id, lurl, s.peerRt)
|
|
if err == nil || err == lease.ErrLeaseNotFound {
|
|
return ttl, err
|
|
}
|
|
}
|
|
// Throttle in case of e.g. connection problems.
|
|
time.Sleep(50 * time.Millisecond)
|
|
}
|
|
|
|
if cctx.Err() == context.DeadlineExceeded {
|
|
return -1, ErrTimeout
|
|
}
|
|
return -1, ErrCanceled
|
|
}
|
|
|
|
func (s *EtcdServer) LeaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveRequest) (*pb.LeaseTimeToLiveResponse, error) {
|
|
if s.Leader() == s.ID() {
|
|
// primary; timetolive directly from leader
|
|
le := s.lessor.Lookup(lease.LeaseID(r.ID))
|
|
if le == nil {
|
|
return nil, lease.ErrLeaseNotFound
|
|
}
|
|
// TODO: fill out ResponseHeader
|
|
resp := &pb.LeaseTimeToLiveResponse{Header: &pb.ResponseHeader{}, ID: r.ID, TTL: int64(le.Remaining().Seconds()), GrantedTTL: le.TTL()}
|
|
if r.Keys {
|
|
ks := le.Keys()
|
|
kbs := make([][]byte, len(ks))
|
|
for i := range ks {
|
|
kbs[i] = []byte(ks[i])
|
|
}
|
|
resp.Keys = kbs
|
|
}
|
|
return resp, nil
|
|
}
|
|
|
|
cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout())
|
|
defer cancel()
|
|
|
|
// forward to leader
|
|
for cctx.Err() == nil {
|
|
leader, err := s.waitLeader(cctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for _, url := range leader.PeerURLs {
|
|
lurl := url + leasehttp.LeaseInternalPrefix
|
|
resp, err := leasehttp.TimeToLiveHTTP(cctx, lease.LeaseID(r.ID), r.Keys, lurl, s.peerRt)
|
|
if err == nil {
|
|
return resp.LeaseTimeToLiveResponse, nil
|
|
}
|
|
if err == lease.ErrLeaseNotFound {
|
|
return nil, err
|
|
}
|
|
}
|
|
}
|
|
|
|
if cctx.Err() == context.DeadlineExceeded {
|
|
return nil, ErrTimeout
|
|
}
|
|
return nil, ErrCanceled
|
|
}
|
|
|
|
func (s *EtcdServer) LeaseLeases(ctx context.Context, r *pb.LeaseLeasesRequest) (*pb.LeaseLeasesResponse, error) {
|
|
ls := s.lessor.Leases()
|
|
lss := make([]*pb.LeaseStatus, len(ls))
|
|
for i := range ls {
|
|
lss[i] = &pb.LeaseStatus{ID: int64(ls[i].ID)}
|
|
}
|
|
return &pb.LeaseLeasesResponse{Header: newHeader(s), Leases: lss}, nil
|
|
}
|
|
|
|
func (s *EtcdServer) waitLeader(ctx context.Context) (*membership.Member, error) {
|
|
leader := s.cluster.Member(s.Leader())
|
|
for leader == nil {
|
|
// wait an election
|
|
dur := time.Duration(s.Cfg.ElectionTicks) * time.Duration(s.Cfg.TickMs) * time.Millisecond
|
|
select {
|
|
case <-time.After(dur):
|
|
leader = s.cluster.Member(s.Leader())
|
|
case <-s.stopping:
|
|
return nil, ErrStopped
|
|
case <-ctx.Done():
|
|
return nil, ErrNoLeader
|
|
}
|
|
}
|
|
if leader == nil || len(leader.PeerURLs) == 0 {
|
|
return nil, ErrNoLeader
|
|
}
|
|
return leader, nil
|
|
}
|
|
|
|
func (s *EtcdServer) Alarm(ctx context.Context, r *pb.AlarmRequest) (*pb.AlarmResponse, error) {
|
|
resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{Alarm: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.AlarmResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) AuthEnable(ctx context.Context, r *pb.AuthEnableRequest) (*pb.AuthEnableResponse, error) {
|
|
resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{AuthEnable: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.AuthEnableResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) AuthDisable(ctx context.Context, r *pb.AuthDisableRequest) (*pb.AuthDisableResponse, error) {
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthDisable: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.AuthDisableResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) AuthStatus(ctx context.Context, r *pb.AuthStatusRequest) (*pb.AuthStatusResponse, error) {
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthStatus: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.AuthStatusResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) Authenticate(ctx context.Context, r *pb.AuthenticateRequest) (*pb.AuthenticateResponse, error) {
|
|
if err := s.linearizableReadNotify(ctx); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
lg := s.Logger()
|
|
|
|
var resp proto.Message
|
|
for {
|
|
checkedRevision, err := s.AuthStore().CheckPassword(r.Name, r.Password)
|
|
if err != nil {
|
|
if err != auth.ErrAuthNotEnabled {
|
|
lg.Warn(
|
|
"invalid authentication was requested",
|
|
zap.String("user", r.Name),
|
|
zap.Error(err),
|
|
)
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
st, err := s.AuthStore().GenTokenPrefix()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// internalReq doesn't need to have Password because the above s.AuthStore().CheckPassword() already did it.
|
|
// In addition, it will let a WAL entry not record password as a plain text.
|
|
internalReq := &pb.InternalAuthenticateRequest{
|
|
Name: r.Name,
|
|
SimpleToken: st,
|
|
}
|
|
|
|
resp, err = s.raftRequestOnce(ctx, pb.InternalRaftRequest{Authenticate: internalReq})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if checkedRevision == s.AuthStore().Revision() {
|
|
break
|
|
}
|
|
|
|
lg.Info("revision when password checked became stale; retrying")
|
|
}
|
|
|
|
return resp.(*pb.AuthenticateResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) UserAdd(ctx context.Context, r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) {
|
|
if r.Options == nil || !r.Options.NoPassword {
|
|
hashedPassword, err := bcrypt.GenerateFromPassword([]byte(r.Password), s.authStore.BcryptCost())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
r.HashedPassword = base64.StdEncoding.EncodeToString(hashedPassword)
|
|
r.Password = ""
|
|
}
|
|
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserAdd: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.AuthUserAddResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) UserDelete(ctx context.Context, r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error) {
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserDelete: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.AuthUserDeleteResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) UserChangePassword(ctx context.Context, r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error) {
|
|
if r.Password != "" {
|
|
hashedPassword, err := bcrypt.GenerateFromPassword([]byte(r.Password), s.authStore.BcryptCost())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
r.HashedPassword = base64.StdEncoding.EncodeToString(hashedPassword)
|
|
r.Password = ""
|
|
}
|
|
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserChangePassword: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.AuthUserChangePasswordResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) UserGrantRole(ctx context.Context, r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error) {
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserGrantRole: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.AuthUserGrantRoleResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) UserGet(ctx context.Context, r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) {
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserGet: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.AuthUserGetResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) UserList(ctx context.Context, r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error) {
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserList: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.AuthUserListResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) UserRevokeRole(ctx context.Context, r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error) {
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserRevokeRole: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.AuthUserRevokeRoleResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error) {
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleAdd: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.AuthRoleAddResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) RoleGrantPermission(ctx context.Context, r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error) {
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleGrantPermission: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.AuthRoleGrantPermissionResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) RoleGet(ctx context.Context, r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) {
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleGet: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.AuthRoleGetResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) RoleList(ctx context.Context, r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error) {
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleList: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.AuthRoleListResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) RoleRevokePermission(ctx context.Context, r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error) {
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleRevokePermission: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.AuthRoleRevokePermissionResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) RoleDelete(ctx context.Context, r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error) {
|
|
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleDelete: r})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return resp.(*pb.AuthRoleDeleteResponse), nil
|
|
}
|
|
|
|
func (s *EtcdServer) raftRequestOnce(ctx context.Context, r pb.InternalRaftRequest) (proto.Message, error) {
|
|
result, err := s.processInternalRaftRequestOnce(ctx, r)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if result.err != nil {
|
|
return nil, result.err
|
|
}
|
|
if startTime, ok := ctx.Value(traceutil.StartTimeKey).(time.Time); ok && result.trace != nil {
|
|
applyStart := result.trace.GetStartTime()
|
|
// The trace object is created in apply. Here reset the start time to trace
|
|
// the raft request time by the difference between the request start time
|
|
// and apply start time
|
|
result.trace.SetStartTime(startTime)
|
|
result.trace.InsertStep(0, applyStart, "process raft request")
|
|
result.trace.LogIfLong(traceThreshold)
|
|
}
|
|
return result.resp, nil
|
|
}
|
|
|
|
func (s *EtcdServer) raftRequest(ctx context.Context, r pb.InternalRaftRequest) (proto.Message, error) {
|
|
return s.raftRequestOnce(ctx, r)
|
|
}
|
|
|
|
// doSerialize handles the auth logic, with permissions checked by "chk", for a serialized request "get". Returns a non-nil error on authentication failure.
|
|
func (s *EtcdServer) doSerialize(ctx context.Context, chk func(*auth.AuthInfo) error, get func()) error {
|
|
trace := traceutil.Get(ctx)
|
|
ai, err := s.AuthInfoFromCtx(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if ai == nil {
|
|
// chk expects non-nil AuthInfo; use empty credentials
|
|
ai = &auth.AuthInfo{}
|
|
}
|
|
if err = chk(ai); err != nil {
|
|
return err
|
|
}
|
|
trace.Step("get authentication metadata")
|
|
// fetch response for serialized request
|
|
get()
|
|
// check for stale token revision in case the auth store was updated while
|
|
// the request has been handled.
|
|
if ai.Revision != 0 && ai.Revision != s.authStore.Revision() {
|
|
return auth.ErrAuthOldRevision
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *EtcdServer) processInternalRaftRequestOnce(ctx context.Context, r pb.InternalRaftRequest) (*applyResult, error) {
|
|
ai := s.getAppliedIndex()
|
|
ci := s.getCommittedIndex()
|
|
if ci > ai+maxGapBetweenApplyAndCommitIndex {
|
|
return nil, ErrTooManyRequests
|
|
}
|
|
|
|
r.Header = &pb.RequestHeader{
|
|
ID: s.reqIDGen.Next(),
|
|
}
|
|
|
|
// check authinfo if it is not InternalAuthenticateRequest
|
|
if r.Authenticate == nil {
|
|
authInfo, err := s.AuthInfoFromCtx(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if authInfo != nil {
|
|
r.Header.Username = authInfo.Username
|
|
r.Header.AuthRevision = authInfo.Revision
|
|
}
|
|
}
|
|
|
|
data, err := r.Marshal()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if len(data) > int(s.Cfg.MaxRequestBytes) {
|
|
return nil, ErrRequestTooLarge
|
|
}
|
|
|
|
id := r.ID
|
|
if id == 0 {
|
|
id = r.Header.ID
|
|
}
|
|
ch := s.w.Register(id)
|
|
|
|
cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout())
|
|
defer cancel()
|
|
|
|
start := time.Now()
|
|
err = s.r.Propose(cctx, data)
|
|
if err != nil {
|
|
proposalsFailed.Inc()
|
|
s.w.Trigger(id, nil) // GC wait
|
|
return nil, err
|
|
}
|
|
proposalsPending.Inc()
|
|
defer proposalsPending.Dec()
|
|
|
|
select {
|
|
case x := <-ch:
|
|
return x.(*applyResult), nil
|
|
case <-cctx.Done():
|
|
proposalsFailed.Inc()
|
|
s.w.Trigger(id, nil) // GC wait
|
|
return nil, s.parseProposeCtxErr(cctx.Err(), start)
|
|
case <-s.done:
|
|
return nil, ErrStopped
|
|
}
|
|
}
|
|
|
|
// Watchable returns a watchable interface attached to the etcdserver.
|
|
func (s *EtcdServer) Watchable() mvcc.WatchableKV { return s.KV() }
|
|
|
|
func (s *EtcdServer) linearizableReadLoop() {
|
|
for {
|
|
requestId := s.reqIDGen.Next()
|
|
leaderChangedNotifier := s.LeaderChangedNotify()
|
|
select {
|
|
case <-leaderChangedNotifier:
|
|
continue
|
|
case <-s.readwaitc:
|
|
case <-s.stopping:
|
|
return
|
|
}
|
|
|
|
// as a single loop is can unlock multiple reads, it is not very useful
|
|
// to propagate the trace from Txn or Range.
|
|
trace := traceutil.New("linearizableReadLoop", s.Logger())
|
|
|
|
nextnr := newNotifier()
|
|
s.readMu.Lock()
|
|
nr := s.readNotifier
|
|
s.readNotifier = nextnr
|
|
s.readMu.Unlock()
|
|
|
|
confirmedIndex, err := s.requestCurrentIndex(leaderChangedNotifier, requestId)
|
|
if isStopped(err) {
|
|
return
|
|
}
|
|
if err != nil {
|
|
nr.notify(err)
|
|
continue
|
|
}
|
|
|
|
trace.Step("read index received")
|
|
|
|
trace.AddField(traceutil.Field{Key: "readStateIndex", Value: confirmedIndex})
|
|
|
|
appliedIndex := s.getAppliedIndex()
|
|
trace.AddField(traceutil.Field{Key: "appliedIndex", Value: strconv.FormatUint(appliedIndex, 10)})
|
|
|
|
if appliedIndex < confirmedIndex {
|
|
select {
|
|
case <-s.applyWait.Wait(confirmedIndex):
|
|
case <-s.stopping:
|
|
return
|
|
}
|
|
}
|
|
// unblock all l-reads requested at indices before confirmedIndex
|
|
nr.notify(nil)
|
|
trace.Step("applied index is now lower than readState.Index")
|
|
|
|
trace.LogAllStepsIfLong(traceThreshold)
|
|
}
|
|
}
|
|
|
|
func isStopped(err error) bool {
|
|
return err == raft.ErrStopped || err == ErrStopped
|
|
}
|
|
|
|
func (s *EtcdServer) requestCurrentIndex(leaderChangedNotifier <-chan struct{}, requestId uint64) (uint64, error) {
|
|
err := s.sendReadIndex(requestId)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
lg := s.Logger()
|
|
errorTimer := time.NewTimer(s.Cfg.ReqTimeout())
|
|
defer errorTimer.Stop()
|
|
retryTimer := time.NewTimer(readIndexRetryTime)
|
|
defer retryTimer.Stop()
|
|
|
|
firstCommitInTermNotifier := s.FirstCommitInTermNotify()
|
|
|
|
for {
|
|
select {
|
|
case rs := <-s.r.readStateC:
|
|
requestIdBytes := uint64ToBigEndianBytes(requestId)
|
|
gotOwnResponse := bytes.Equal(rs.RequestCtx, requestIdBytes)
|
|
if !gotOwnResponse {
|
|
// a previous request might time out. now we should ignore the response of it and
|
|
// continue waiting for the response of the current requests.
|
|
responseId := uint64(0)
|
|
if len(rs.RequestCtx) == 8 {
|
|
responseId = binary.BigEndian.Uint64(rs.RequestCtx)
|
|
}
|
|
lg.Warn(
|
|
"ignored out-of-date read index response; local node read indexes queueing up and waiting to be in sync with leader",
|
|
zap.Uint64("sent-request-id", requestId),
|
|
zap.Uint64("received-request-id", responseId),
|
|
)
|
|
slowReadIndex.Inc()
|
|
continue
|
|
}
|
|
return rs.Index, nil
|
|
case <-leaderChangedNotifier:
|
|
readIndexFailed.Inc()
|
|
// return a retryable error.
|
|
return 0, ErrLeaderChanged
|
|
case <-firstCommitInTermNotifier:
|
|
firstCommitInTermNotifier = s.FirstCommitInTermNotify()
|
|
lg.Info("first commit in current term: resending ReadIndex request")
|
|
err := s.sendReadIndex(requestId)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
retryTimer.Reset(readIndexRetryTime)
|
|
continue
|
|
case <-retryTimer.C:
|
|
lg.Warn(
|
|
"waiting for ReadIndex response took too long, retrying",
|
|
zap.Uint64("sent-request-id", requestId),
|
|
zap.Duration("retry-timeout", readIndexRetryTime),
|
|
)
|
|
err := s.sendReadIndex(requestId)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
retryTimer.Reset(readIndexRetryTime)
|
|
continue
|
|
case <-errorTimer.C:
|
|
lg.Warn(
|
|
"timed out waiting for read index response (local node might have slow network)",
|
|
zap.Duration("timeout", s.Cfg.ReqTimeout()),
|
|
)
|
|
slowReadIndex.Inc()
|
|
return 0, ErrTimeout
|
|
case <-s.stopping:
|
|
return 0, ErrStopped
|
|
}
|
|
}
|
|
}
|
|
|
|
func uint64ToBigEndianBytes(number uint64) []byte {
|
|
byteResult := make([]byte, 8)
|
|
binary.BigEndian.PutUint64(byteResult, number)
|
|
return byteResult
|
|
}
|
|
|
|
func (s *EtcdServer) sendReadIndex(requestIndex uint64) error {
|
|
ctxToSend := uint64ToBigEndianBytes(requestIndex)
|
|
|
|
cctx, cancel := context.WithTimeout(context.Background(), s.Cfg.ReqTimeout())
|
|
err := s.r.ReadIndex(cctx, ctxToSend)
|
|
cancel()
|
|
if err == raft.ErrStopped {
|
|
return err
|
|
}
|
|
if err != nil {
|
|
lg := s.Logger()
|
|
lg.Warn("failed to get read index from Raft", zap.Error(err))
|
|
readIndexFailed.Inc()
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *EtcdServer) LinearizableReadNotify(ctx context.Context) error {
|
|
return s.linearizableReadNotify(ctx)
|
|
}
|
|
|
|
func (s *EtcdServer) linearizableReadNotify(ctx context.Context) error {
|
|
s.readMu.RLock()
|
|
nc := s.readNotifier
|
|
s.readMu.RUnlock()
|
|
|
|
// signal linearizable loop for current notify if it hasn't been already
|
|
select {
|
|
case s.readwaitc <- struct{}{}:
|
|
default:
|
|
}
|
|
|
|
// wait for read state notification
|
|
select {
|
|
case <-nc.c:
|
|
return nc.err
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case <-s.done:
|
|
return ErrStopped
|
|
}
|
|
}
|
|
|
|
func (s *EtcdServer) AuthInfoFromCtx(ctx context.Context) (*auth.AuthInfo, error) {
|
|
authInfo, err := s.AuthStore().AuthInfoFromCtx(ctx)
|
|
if authInfo != nil || err != nil {
|
|
return authInfo, err
|
|
}
|
|
if !s.Cfg.ClientCertAuthEnabled {
|
|
return nil, nil
|
|
}
|
|
authInfo = s.AuthStore().AuthInfoFromTLS(ctx)
|
|
return authInfo, nil
|
|
}
|
|
|
|
func (s *EtcdServer) Downgrade(ctx context.Context, r *pb.DowngradeRequest) (*pb.DowngradeResponse, error) {
|
|
switch r.Action {
|
|
case pb.DowngradeRequest_VALIDATE:
|
|
return s.downgradeValidate(ctx, r.Version)
|
|
case pb.DowngradeRequest_ENABLE:
|
|
return s.downgradeEnable(ctx, r)
|
|
case pb.DowngradeRequest_CANCEL:
|
|
return s.downgradeCancel(ctx)
|
|
default:
|
|
return nil, ErrUnknownMethod
|
|
}
|
|
}
|
|
|
|
func (s *EtcdServer) downgradeValidate(ctx context.Context, v string) (*pb.DowngradeResponse, error) {
|
|
resp := &pb.DowngradeResponse{}
|
|
|
|
targetVersion, err := convertToClusterVersion(v)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// gets leaders commit index and wait for local store to finish applying that index
|
|
// to avoid using stale downgrade information
|
|
err = s.linearizableReadNotify(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
cv := s.ClusterVersion()
|
|
if cv == nil {
|
|
return nil, ErrClusterVersionUnavailable
|
|
}
|
|
resp.Version = cv.String()
|
|
|
|
allowedTargetVersion := membership.AllowedDowngradeVersion(cv)
|
|
if !targetVersion.Equal(*allowedTargetVersion) {
|
|
return nil, ErrInvalidDowngradeTargetVersion
|
|
}
|
|
|
|
downgradeInfo := s.cluster.DowngradeInfo()
|
|
if downgradeInfo.Enabled {
|
|
// Todo: return the downgrade status along with the error msg
|
|
return nil, ErrDowngradeInProcess
|
|
}
|
|
return resp, nil
|
|
}
|
|
|
|
func (s *EtcdServer) downgradeEnable(ctx context.Context, r *pb.DowngradeRequest) (*pb.DowngradeResponse, error) {
|
|
// validate downgrade capability before starting downgrade
|
|
v := r.Version
|
|
lg := s.Logger()
|
|
if resp, err := s.downgradeValidate(ctx, v); err != nil {
|
|
lg.Warn("reject downgrade request", zap.Error(err))
|
|
return resp, err
|
|
}
|
|
targetVersion, err := convertToClusterVersion(v)
|
|
if err != nil {
|
|
lg.Warn("reject downgrade request", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
|
|
raftRequest := membershippb.DowngradeInfoSetRequest{Enabled: true, Ver: targetVersion.String()}
|
|
_, err = s.raftRequest(ctx, pb.InternalRaftRequest{DowngradeInfoSet: &raftRequest})
|
|
if err != nil {
|
|
lg.Warn("reject downgrade request", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
resp := pb.DowngradeResponse{Version: s.ClusterVersion().String()}
|
|
return &resp, nil
|
|
}
|
|
|
|
func (s *EtcdServer) downgradeCancel(ctx context.Context) (*pb.DowngradeResponse, error) {
|
|
// gets leaders commit index and wait for local store to finish applying that index
|
|
// to avoid using stale downgrade information
|
|
if err := s.linearizableReadNotify(ctx); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
downgradeInfo := s.cluster.DowngradeInfo()
|
|
if !downgradeInfo.Enabled {
|
|
return nil, ErrNoInflightDowngrade
|
|
}
|
|
|
|
raftRequest := membershippb.DowngradeInfoSetRequest{Enabled: false}
|
|
_, err := s.raftRequest(ctx, pb.InternalRaftRequest{DowngradeInfoSet: &raftRequest})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
resp := pb.DowngradeResponse{Version: s.ClusterVersion().String()}
|
|
return &resp, nil
|
|
}
|