mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Encapsulation of applier logic: Move Txn related code out of applier.go.
The PR removes calls to applierV3base logic from server.go that is NOT part of 'application'. The original idea was that read-only transaction and Range call shared logic with Apply, so they can call appliers directly (but bypassing all 'corrupt', 'quota' and 'auth' wrappers). This PR moves all the logic to a separate file (that later can become package on its own).
This commit is contained in:
parent
cc5d6f7dc3
commit
cdf9869d70
@ -15,17 +15,14 @@
|
|||||||
package etcdserver
|
package etcdserver
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"sort"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/coreos/go-semver/semver"
|
"github.com/coreos/go-semver/semver"
|
||||||
pb "go.etcd.io/etcd/api/v3/etcdserverpb"
|
pb "go.etcd.io/etcd/api/v3/etcdserverpb"
|
||||||
"go.etcd.io/etcd/api/v3/membershippb"
|
"go.etcd.io/etcd/api/v3/membershippb"
|
||||||
"go.etcd.io/etcd/api/v3/mvccpb"
|
|
||||||
"go.etcd.io/etcd/client/pkg/v3/types"
|
"go.etcd.io/etcd/client/pkg/v3/types"
|
||||||
"go.etcd.io/etcd/pkg/v3/traceutil"
|
"go.etcd.io/etcd/pkg/v3/traceutil"
|
||||||
"go.etcd.io/etcd/server/v3/auth"
|
"go.etcd.io/etcd/server/v3/auth"
|
||||||
@ -99,24 +96,12 @@ type applierV3 interface {
|
|||||||
RoleList(ua *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error)
|
RoleList(ua *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
type checkReqFunc func(mvcc.ReadView, *pb.RequestOp) error
|
|
||||||
|
|
||||||
type applierV3backend struct {
|
type applierV3backend struct {
|
||||||
s *EtcdServer
|
s *EtcdServer
|
||||||
|
|
||||||
checkPut checkReqFunc
|
|
||||||
checkRange checkReqFunc
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EtcdServer) newApplierV3Backend() applierV3 {
|
func (s *EtcdServer) newApplierV3Backend() applierV3 {
|
||||||
base := &applierV3backend{s: s}
|
return &applierV3backend{s: s}
|
||||||
base.checkPut = func(rv mvcc.ReadView, req *pb.RequestOp) error {
|
|
||||||
return base.checkRequestPut(rv, req)
|
|
||||||
}
|
|
||||||
base.checkRange = func(rv mvcc.ReadView, req *pb.RequestOp) error {
|
|
||||||
return base.checkRequestRange(rv, req)
|
|
||||||
}
|
|
||||||
return base
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EtcdServer) newApplierV3Internal() applierV3Internal {
|
func (s *EtcdServer) newApplierV3Internal() applierV3Internal {
|
||||||
@ -249,434 +234,19 @@ func (a *applierV3backend) Apply(r *pb.InternalRaftRequest, shouldApplyV3 member
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *applierV3backend) Put(ctx context.Context, txn mvcc.TxnWrite, p *pb.PutRequest) (resp *pb.PutResponse, trace *traceutil.Trace, err error) {
|
func (a *applierV3backend) Put(ctx context.Context, txn mvcc.TxnWrite, p *pb.PutRequest) (resp *pb.PutResponse, trace *traceutil.Trace, err error) {
|
||||||
resp = &pb.PutResponse{}
|
return Put(ctx, a.s.Logger(), a.s.lessor, a.s.KV(), txn, p)
|
||||||
resp.Header = &pb.ResponseHeader{}
|
|
||||||
trace = traceutil.Get(ctx)
|
|
||||||
// create put tracing if the trace in context is empty
|
|
||||||
if trace.IsEmpty() {
|
|
||||||
trace = traceutil.New("put",
|
|
||||||
a.s.Logger(),
|
|
||||||
traceutil.Field{Key: "key", Value: string(p.Key)},
|
|
||||||
traceutil.Field{Key: "req_size", Value: p.Size()},
|
|
||||||
)
|
|
||||||
}
|
|
||||||
val, leaseID := p.Value, lease.LeaseID(p.Lease)
|
|
||||||
if txn == nil {
|
|
||||||
if leaseID != lease.NoLease {
|
|
||||||
if l := a.s.lessor.Lookup(leaseID); l == nil {
|
|
||||||
return nil, nil, lease.ErrLeaseNotFound
|
|
||||||
}
|
|
||||||
}
|
|
||||||
txn = a.s.KV().Write(trace)
|
|
||||||
defer txn.End()
|
|
||||||
}
|
|
||||||
|
|
||||||
var rr *mvcc.RangeResult
|
|
||||||
if p.IgnoreValue || p.IgnoreLease || p.PrevKv {
|
|
||||||
trace.StepWithFunction(func() {
|
|
||||||
rr, err = txn.Range(context.TODO(), p.Key, nil, mvcc.RangeOptions{})
|
|
||||||
}, "get previous kv pair")
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if p.IgnoreValue || p.IgnoreLease {
|
|
||||||
if rr == nil || len(rr.KVs) == 0 {
|
|
||||||
// ignore_{lease,value} flag expects previous key-value pair
|
|
||||||
return nil, nil, ErrKeyNotFound
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if p.IgnoreValue {
|
|
||||||
val = rr.KVs[0].Value
|
|
||||||
}
|
|
||||||
if p.IgnoreLease {
|
|
||||||
leaseID = lease.LeaseID(rr.KVs[0].Lease)
|
|
||||||
}
|
|
||||||
if p.PrevKv {
|
|
||||||
if rr != nil && len(rr.KVs) != 0 {
|
|
||||||
resp.PrevKv = &rr.KVs[0]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
resp.Header.Revision = txn.Put(p.Key, val, leaseID)
|
|
||||||
trace.AddField(traceutil.Field{Key: "response_revision", Value: resp.Header.Revision})
|
|
||||||
return resp, trace, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *applierV3backend) DeleteRange(txn mvcc.TxnWrite, dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
|
func (a *applierV3backend) DeleteRange(txn mvcc.TxnWrite, dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
|
||||||
resp := &pb.DeleteRangeResponse{}
|
return DeleteRange(a.s.KV(), txn, dr)
|
||||||
resp.Header = &pb.ResponseHeader{}
|
|
||||||
end := mkGteRange(dr.RangeEnd)
|
|
||||||
|
|
||||||
if txn == nil {
|
|
||||||
txn = a.s.kv.Write(traceutil.TODO())
|
|
||||||
defer txn.End()
|
|
||||||
}
|
|
||||||
|
|
||||||
if dr.PrevKv {
|
|
||||||
rr, err := txn.Range(context.TODO(), dr.Key, end, mvcc.RangeOptions{})
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
if rr != nil {
|
|
||||||
resp.PrevKvs = make([]*mvccpb.KeyValue, len(rr.KVs))
|
|
||||||
for i := range rr.KVs {
|
|
||||||
resp.PrevKvs[i] = &rr.KVs[i]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
resp.Deleted, resp.Header.Revision = txn.DeleteRange(dr.Key, end)
|
|
||||||
return resp, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *applierV3backend) Range(ctx context.Context, txn mvcc.TxnRead, r *pb.RangeRequest) (*pb.RangeResponse, error) {
|
func (a *applierV3backend) Range(ctx context.Context, txn mvcc.TxnRead, r *pb.RangeRequest) (*pb.RangeResponse, error) {
|
||||||
trace := traceutil.Get(ctx)
|
return Range(ctx, a.s.Logger(), a.s.KV(), txn, r)
|
||||||
|
|
||||||
resp := &pb.RangeResponse{}
|
|
||||||
resp.Header = &pb.ResponseHeader{}
|
|
||||||
|
|
||||||
lg := a.s.Logger()
|
|
||||||
|
|
||||||
if txn == nil {
|
|
||||||
txn = a.s.kv.Read(mvcc.ConcurrentReadTxMode, trace)
|
|
||||||
defer txn.End()
|
|
||||||
}
|
|
||||||
|
|
||||||
limit := r.Limit
|
|
||||||
if r.SortOrder != pb.RangeRequest_NONE ||
|
|
||||||
r.MinModRevision != 0 || r.MaxModRevision != 0 ||
|
|
||||||
r.MinCreateRevision != 0 || r.MaxCreateRevision != 0 {
|
|
||||||
// fetch everything; sort and truncate afterwards
|
|
||||||
limit = 0
|
|
||||||
}
|
|
||||||
if limit > 0 {
|
|
||||||
// fetch one extra for 'more' flag
|
|
||||||
limit = limit + 1
|
|
||||||
}
|
|
||||||
|
|
||||||
ro := mvcc.RangeOptions{
|
|
||||||
Limit: limit,
|
|
||||||
Rev: r.Revision,
|
|
||||||
Count: r.CountOnly,
|
|
||||||
}
|
|
||||||
|
|
||||||
rr, err := txn.Range(ctx, r.Key, mkGteRange(r.RangeEnd), ro)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if r.MaxModRevision != 0 {
|
|
||||||
f := func(kv *mvccpb.KeyValue) bool { return kv.ModRevision > r.MaxModRevision }
|
|
||||||
pruneKVs(rr, f)
|
|
||||||
}
|
|
||||||
if r.MinModRevision != 0 {
|
|
||||||
f := func(kv *mvccpb.KeyValue) bool { return kv.ModRevision < r.MinModRevision }
|
|
||||||
pruneKVs(rr, f)
|
|
||||||
}
|
|
||||||
if r.MaxCreateRevision != 0 {
|
|
||||||
f := func(kv *mvccpb.KeyValue) bool { return kv.CreateRevision > r.MaxCreateRevision }
|
|
||||||
pruneKVs(rr, f)
|
|
||||||
}
|
|
||||||
if r.MinCreateRevision != 0 {
|
|
||||||
f := func(kv *mvccpb.KeyValue) bool { return kv.CreateRevision < r.MinCreateRevision }
|
|
||||||
pruneKVs(rr, f)
|
|
||||||
}
|
|
||||||
|
|
||||||
sortOrder := r.SortOrder
|
|
||||||
if r.SortTarget != pb.RangeRequest_KEY && sortOrder == pb.RangeRequest_NONE {
|
|
||||||
// Since current mvcc.Range implementation returns results
|
|
||||||
// sorted by keys in lexiographically ascending order,
|
|
||||||
// sort ASCEND by default only when target is not 'KEY'
|
|
||||||
sortOrder = pb.RangeRequest_ASCEND
|
|
||||||
} else if r.SortTarget == pb.RangeRequest_KEY && sortOrder == pb.RangeRequest_ASCEND {
|
|
||||||
// Since current mvcc.Range implementation returns results
|
|
||||||
// sorted by keys in lexiographically ascending order,
|
|
||||||
// don't re-sort when target is 'KEY' and order is ASCEND
|
|
||||||
sortOrder = pb.RangeRequest_NONE
|
|
||||||
}
|
|
||||||
if sortOrder != pb.RangeRequest_NONE {
|
|
||||||
var sorter sort.Interface
|
|
||||||
switch {
|
|
||||||
case r.SortTarget == pb.RangeRequest_KEY:
|
|
||||||
sorter = &kvSortByKey{&kvSort{rr.KVs}}
|
|
||||||
case r.SortTarget == pb.RangeRequest_VERSION:
|
|
||||||
sorter = &kvSortByVersion{&kvSort{rr.KVs}}
|
|
||||||
case r.SortTarget == pb.RangeRequest_CREATE:
|
|
||||||
sorter = &kvSortByCreate{&kvSort{rr.KVs}}
|
|
||||||
case r.SortTarget == pb.RangeRequest_MOD:
|
|
||||||
sorter = &kvSortByMod{&kvSort{rr.KVs}}
|
|
||||||
case r.SortTarget == pb.RangeRequest_VALUE:
|
|
||||||
sorter = &kvSortByValue{&kvSort{rr.KVs}}
|
|
||||||
default:
|
|
||||||
lg.Panic("unexpected sort target", zap.Int32("sort-target", int32(r.SortTarget)))
|
|
||||||
}
|
|
||||||
switch {
|
|
||||||
case sortOrder == pb.RangeRequest_ASCEND:
|
|
||||||
sort.Sort(sorter)
|
|
||||||
case sortOrder == pb.RangeRequest_DESCEND:
|
|
||||||
sort.Sort(sort.Reverse(sorter))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if r.Limit > 0 && len(rr.KVs) > int(r.Limit) {
|
|
||||||
rr.KVs = rr.KVs[:r.Limit]
|
|
||||||
resp.More = true
|
|
||||||
}
|
|
||||||
trace.Step("filter and sort the key-value pairs")
|
|
||||||
resp.Header.Revision = rr.Rev
|
|
||||||
resp.Count = int64(rr.Count)
|
|
||||||
resp.Kvs = make([]*mvccpb.KeyValue, len(rr.KVs))
|
|
||||||
for i := range rr.KVs {
|
|
||||||
if r.KeysOnly {
|
|
||||||
rr.KVs[i].Value = nil
|
|
||||||
}
|
|
||||||
resp.Kvs[i] = &rr.KVs[i]
|
|
||||||
}
|
|
||||||
trace.Step("assemble the response")
|
|
||||||
return resp, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *applierV3backend) Txn(ctx context.Context, rt *pb.TxnRequest) (*pb.TxnResponse, *traceutil.Trace, error) {
|
func (a *applierV3backend) Txn(ctx context.Context, rt *pb.TxnRequest) (*pb.TxnResponse, *traceutil.Trace, error) {
|
||||||
trace := traceutil.Get(ctx)
|
return Txn(ctx, a.s.Logger(), rt, a.s.Cfg.ExperimentalTxnModeWriteWithSharedBuffer, a.s.KV(), a.s.lessor)
|
||||||
if trace.IsEmpty() {
|
|
||||||
trace = traceutil.New("transaction", a.s.Logger())
|
|
||||||
ctx = context.WithValue(ctx, traceutil.TraceKey, trace)
|
|
||||||
}
|
|
||||||
isWrite := !isTxnReadonly(rt)
|
|
||||||
|
|
||||||
// When the transaction contains write operations, we use ReadTx instead of
|
|
||||||
// ConcurrentReadTx to avoid extra overhead of copying buffer.
|
|
||||||
var txn mvcc.TxnWrite
|
|
||||||
if isWrite && a.s.Cfg.ExperimentalTxnModeWriteWithSharedBuffer {
|
|
||||||
txn = mvcc.NewReadOnlyTxnWrite(a.s.KV().Read(mvcc.SharedBufReadTxMode, trace))
|
|
||||||
} else {
|
|
||||||
txn = mvcc.NewReadOnlyTxnWrite(a.s.KV().Read(mvcc.ConcurrentReadTxMode, trace))
|
|
||||||
}
|
|
||||||
|
|
||||||
var txnPath []bool
|
|
||||||
trace.StepWithFunction(
|
|
||||||
func() {
|
|
||||||
txnPath = compareToPath(txn, rt)
|
|
||||||
},
|
|
||||||
"compare",
|
|
||||||
)
|
|
||||||
|
|
||||||
if isWrite {
|
|
||||||
trace.AddField(traceutil.Field{Key: "read_only", Value: false})
|
|
||||||
if _, err := checkRequests(txn, rt, txnPath, a.checkPut); err != nil {
|
|
||||||
txn.End()
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if _, err := checkRequests(txn, rt, txnPath, a.checkRange); err != nil {
|
|
||||||
txn.End()
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
trace.Step("check requests")
|
|
||||||
txnResp, _ := newTxnResp(rt, txnPath)
|
|
||||||
|
|
||||||
// When executing mutable txn ops, etcd must hold the txn lock so
|
|
||||||
// readers do not see any intermediate results. Since writes are
|
|
||||||
// serialized on the raft loop, the revision in the read view will
|
|
||||||
// be the revision of the write txn.
|
|
||||||
if isWrite {
|
|
||||||
txn.End()
|
|
||||||
txn = a.s.KV().Write(trace)
|
|
||||||
}
|
|
||||||
a.applyTxn(ctx, txn, rt, txnPath, txnResp)
|
|
||||||
rev := txn.Rev()
|
|
||||||
if len(txn.Changes()) != 0 {
|
|
||||||
rev++
|
|
||||||
}
|
|
||||||
txn.End()
|
|
||||||
|
|
||||||
txnResp.Header.Revision = rev
|
|
||||||
trace.AddField(
|
|
||||||
traceutil.Field{Key: "number_of_response", Value: len(txnResp.Responses)},
|
|
||||||
traceutil.Field{Key: "response_revision", Value: txnResp.Header.Revision},
|
|
||||||
)
|
|
||||||
return txnResp, trace, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// newTxnResp allocates a txn response for a txn request given a path.
|
|
||||||
func newTxnResp(rt *pb.TxnRequest, txnPath []bool) (txnResp *pb.TxnResponse, txnCount int) {
|
|
||||||
reqs := rt.Success
|
|
||||||
if !txnPath[0] {
|
|
||||||
reqs = rt.Failure
|
|
||||||
}
|
|
||||||
resps := make([]*pb.ResponseOp, len(reqs))
|
|
||||||
txnResp = &pb.TxnResponse{
|
|
||||||
Responses: resps,
|
|
||||||
Succeeded: txnPath[0],
|
|
||||||
Header: &pb.ResponseHeader{},
|
|
||||||
}
|
|
||||||
for i, req := range reqs {
|
|
||||||
switch tv := req.Request.(type) {
|
|
||||||
case *pb.RequestOp_RequestRange:
|
|
||||||
resps[i] = &pb.ResponseOp{Response: &pb.ResponseOp_ResponseRange{}}
|
|
||||||
case *pb.RequestOp_RequestPut:
|
|
||||||
resps[i] = &pb.ResponseOp{Response: &pb.ResponseOp_ResponsePut{}}
|
|
||||||
case *pb.RequestOp_RequestDeleteRange:
|
|
||||||
resps[i] = &pb.ResponseOp{Response: &pb.ResponseOp_ResponseDeleteRange{}}
|
|
||||||
case *pb.RequestOp_RequestTxn:
|
|
||||||
resp, txns := newTxnResp(tv.RequestTxn, txnPath[1:])
|
|
||||||
resps[i] = &pb.ResponseOp{Response: &pb.ResponseOp_ResponseTxn{ResponseTxn: resp}}
|
|
||||||
txnPath = txnPath[1+txns:]
|
|
||||||
txnCount += txns + 1
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return txnResp, txnCount
|
|
||||||
}
|
|
||||||
|
|
||||||
func compareToPath(rv mvcc.ReadView, rt *pb.TxnRequest) []bool {
|
|
||||||
txnPath := make([]bool, 1)
|
|
||||||
ops := rt.Success
|
|
||||||
if txnPath[0] = applyCompares(rv, rt.Compare); !txnPath[0] {
|
|
||||||
ops = rt.Failure
|
|
||||||
}
|
|
||||||
for _, op := range ops {
|
|
||||||
tv, ok := op.Request.(*pb.RequestOp_RequestTxn)
|
|
||||||
if !ok || tv.RequestTxn == nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
txnPath = append(txnPath, compareToPath(rv, tv.RequestTxn)...)
|
|
||||||
}
|
|
||||||
return txnPath
|
|
||||||
}
|
|
||||||
|
|
||||||
func applyCompares(rv mvcc.ReadView, cmps []*pb.Compare) bool {
|
|
||||||
for _, c := range cmps {
|
|
||||||
if !applyCompare(rv, c) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// applyCompare applies the compare request.
|
|
||||||
// If the comparison succeeds, it returns true. Otherwise, returns false.
|
|
||||||
func applyCompare(rv mvcc.ReadView, c *pb.Compare) bool {
|
|
||||||
// TODO: possible optimizations
|
|
||||||
// * chunk reads for large ranges to conserve memory
|
|
||||||
// * rewrite rules for common patterns:
|
|
||||||
// ex. "[a, b) createrev > 0" => "limit 1 /\ kvs > 0"
|
|
||||||
// * caching
|
|
||||||
rr, err := rv.Range(context.TODO(), c.Key, mkGteRange(c.RangeEnd), mvcc.RangeOptions{})
|
|
||||||
if err != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
if len(rr.KVs) == 0 {
|
|
||||||
if c.Target == pb.Compare_VALUE {
|
|
||||||
// Always fail if comparing a value on a key/keys that doesn't exist;
|
|
||||||
// nil == empty string in grpc; no way to represent missing value
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return compareKV(c, mvccpb.KeyValue{})
|
|
||||||
}
|
|
||||||
for _, kv := range rr.KVs {
|
|
||||||
if !compareKV(c, kv) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
func compareKV(c *pb.Compare, ckv mvccpb.KeyValue) bool {
|
|
||||||
var result int
|
|
||||||
rev := int64(0)
|
|
||||||
switch c.Target {
|
|
||||||
case pb.Compare_VALUE:
|
|
||||||
v := []byte{}
|
|
||||||
if tv, _ := c.TargetUnion.(*pb.Compare_Value); tv != nil {
|
|
||||||
v = tv.Value
|
|
||||||
}
|
|
||||||
result = bytes.Compare(ckv.Value, v)
|
|
||||||
case pb.Compare_CREATE:
|
|
||||||
if tv, _ := c.TargetUnion.(*pb.Compare_CreateRevision); tv != nil {
|
|
||||||
rev = tv.CreateRevision
|
|
||||||
}
|
|
||||||
result = compareInt64(ckv.CreateRevision, rev)
|
|
||||||
case pb.Compare_MOD:
|
|
||||||
if tv, _ := c.TargetUnion.(*pb.Compare_ModRevision); tv != nil {
|
|
||||||
rev = tv.ModRevision
|
|
||||||
}
|
|
||||||
result = compareInt64(ckv.ModRevision, rev)
|
|
||||||
case pb.Compare_VERSION:
|
|
||||||
if tv, _ := c.TargetUnion.(*pb.Compare_Version); tv != nil {
|
|
||||||
rev = tv.Version
|
|
||||||
}
|
|
||||||
result = compareInt64(ckv.Version, rev)
|
|
||||||
case pb.Compare_LEASE:
|
|
||||||
if tv, _ := c.TargetUnion.(*pb.Compare_Lease); tv != nil {
|
|
||||||
rev = tv.Lease
|
|
||||||
}
|
|
||||||
result = compareInt64(ckv.Lease, rev)
|
|
||||||
}
|
|
||||||
switch c.Result {
|
|
||||||
case pb.Compare_EQUAL:
|
|
||||||
return result == 0
|
|
||||||
case pb.Compare_NOT_EQUAL:
|
|
||||||
return result != 0
|
|
||||||
case pb.Compare_GREATER:
|
|
||||||
return result > 0
|
|
||||||
case pb.Compare_LESS:
|
|
||||||
return result < 0
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *applierV3backend) applyTxn(ctx context.Context, txn mvcc.TxnWrite, rt *pb.TxnRequest, txnPath []bool, tresp *pb.TxnResponse) (txns int) {
|
|
||||||
trace := traceutil.Get(ctx)
|
|
||||||
reqs := rt.Success
|
|
||||||
if !txnPath[0] {
|
|
||||||
reqs = rt.Failure
|
|
||||||
}
|
|
||||||
|
|
||||||
lg := a.s.Logger()
|
|
||||||
for i, req := range reqs {
|
|
||||||
respi := tresp.Responses[i].Response
|
|
||||||
switch tv := req.Request.(type) {
|
|
||||||
case *pb.RequestOp_RequestRange:
|
|
||||||
trace.StartSubTrace(
|
|
||||||
traceutil.Field{Key: "req_type", Value: "range"},
|
|
||||||
traceutil.Field{Key: "range_begin", Value: string(tv.RequestRange.Key)},
|
|
||||||
traceutil.Field{Key: "range_end", Value: string(tv.RequestRange.RangeEnd)})
|
|
||||||
resp, err := a.Range(ctx, txn, tv.RequestRange)
|
|
||||||
if err != nil {
|
|
||||||
lg.Panic("unexpected error during txn", zap.Error(err))
|
|
||||||
}
|
|
||||||
respi.(*pb.ResponseOp_ResponseRange).ResponseRange = resp
|
|
||||||
trace.StopSubTrace()
|
|
||||||
case *pb.RequestOp_RequestPut:
|
|
||||||
trace.StartSubTrace(
|
|
||||||
traceutil.Field{Key: "req_type", Value: "put"},
|
|
||||||
traceutil.Field{Key: "key", Value: string(tv.RequestPut.Key)},
|
|
||||||
traceutil.Field{Key: "req_size", Value: tv.RequestPut.Size()})
|
|
||||||
resp, _, err := a.Put(ctx, txn, tv.RequestPut)
|
|
||||||
if err != nil {
|
|
||||||
lg.Panic("unexpected error during txn", zap.Error(err))
|
|
||||||
}
|
|
||||||
respi.(*pb.ResponseOp_ResponsePut).ResponsePut = resp
|
|
||||||
trace.StopSubTrace()
|
|
||||||
case *pb.RequestOp_RequestDeleteRange:
|
|
||||||
resp, err := a.DeleteRange(txn, tv.RequestDeleteRange)
|
|
||||||
if err != nil {
|
|
||||||
lg.Panic("unexpected error during txn", zap.Error(err))
|
|
||||||
}
|
|
||||||
respi.(*pb.ResponseOp_ResponseDeleteRange).ResponseDeleteRange = resp
|
|
||||||
case *pb.RequestOp_RequestTxn:
|
|
||||||
resp := respi.(*pb.ResponseOp_ResponseTxn).ResponseTxn
|
|
||||||
applyTxns := a.applyTxn(ctx, txn, tv.RequestTxn, txnPath[1:], resp)
|
|
||||||
txns += applyTxns + 1
|
|
||||||
txnPath = txnPath[applyTxns+1:]
|
|
||||||
default:
|
|
||||||
// empty union
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return txns
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *applierV3backend) Compaction(compaction *pb.CompactionRequest) (*pb.CompactionResponse, <-chan struct{}, *traceutil.Trace, error) {
|
func (a *applierV3backend) Compaction(compaction *pb.CompactionRequest) (*pb.CompactionResponse, <-chan struct{}, *traceutil.Trace, error) {
|
||||||
@ -1007,131 +577,6 @@ func (a *quotaApplierV3) LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantRes
|
|||||||
return resp, err
|
return resp, err
|
||||||
}
|
}
|
||||||
|
|
||||||
type kvSort struct{ kvs []mvccpb.KeyValue }
|
|
||||||
|
|
||||||
func (s *kvSort) Swap(i, j int) {
|
|
||||||
t := s.kvs[i]
|
|
||||||
s.kvs[i] = s.kvs[j]
|
|
||||||
s.kvs[j] = t
|
|
||||||
}
|
|
||||||
func (s *kvSort) Len() int { return len(s.kvs) }
|
|
||||||
|
|
||||||
type kvSortByKey struct{ *kvSort }
|
|
||||||
|
|
||||||
func (s *kvSortByKey) Less(i, j int) bool {
|
|
||||||
return bytes.Compare(s.kvs[i].Key, s.kvs[j].Key) < 0
|
|
||||||
}
|
|
||||||
|
|
||||||
type kvSortByVersion struct{ *kvSort }
|
|
||||||
|
|
||||||
func (s *kvSortByVersion) Less(i, j int) bool {
|
|
||||||
return (s.kvs[i].Version - s.kvs[j].Version) < 0
|
|
||||||
}
|
|
||||||
|
|
||||||
type kvSortByCreate struct{ *kvSort }
|
|
||||||
|
|
||||||
func (s *kvSortByCreate) Less(i, j int) bool {
|
|
||||||
return (s.kvs[i].CreateRevision - s.kvs[j].CreateRevision) < 0
|
|
||||||
}
|
|
||||||
|
|
||||||
type kvSortByMod struct{ *kvSort }
|
|
||||||
|
|
||||||
func (s *kvSortByMod) Less(i, j int) bool {
|
|
||||||
return (s.kvs[i].ModRevision - s.kvs[j].ModRevision) < 0
|
|
||||||
}
|
|
||||||
|
|
||||||
type kvSortByValue struct{ *kvSort }
|
|
||||||
|
|
||||||
func (s *kvSortByValue) Less(i, j int) bool {
|
|
||||||
return bytes.Compare(s.kvs[i].Value, s.kvs[j].Value) < 0
|
|
||||||
}
|
|
||||||
|
|
||||||
func checkRequests(rv mvcc.ReadView, rt *pb.TxnRequest, txnPath []bool, f checkReqFunc) (int, error) {
|
|
||||||
txnCount := 0
|
|
||||||
reqs := rt.Success
|
|
||||||
if !txnPath[0] {
|
|
||||||
reqs = rt.Failure
|
|
||||||
}
|
|
||||||
for _, req := range reqs {
|
|
||||||
if tv, ok := req.Request.(*pb.RequestOp_RequestTxn); ok && tv.RequestTxn != nil {
|
|
||||||
txns, err := checkRequests(rv, tv.RequestTxn, txnPath[1:], f)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
txnCount += txns + 1
|
|
||||||
txnPath = txnPath[txns+1:]
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if err := f(rv, req); err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return txnCount, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *applierV3backend) checkRequestPut(rv mvcc.ReadView, reqOp *pb.RequestOp) error {
|
|
||||||
tv, ok := reqOp.Request.(*pb.RequestOp_RequestPut)
|
|
||||||
if !ok || tv.RequestPut == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
req := tv.RequestPut
|
|
||||||
if req.IgnoreValue || req.IgnoreLease {
|
|
||||||
// expects previous key-value, error if not exist
|
|
||||||
rr, err := rv.Range(context.TODO(), req.Key, nil, mvcc.RangeOptions{})
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if rr == nil || len(rr.KVs) == 0 {
|
|
||||||
return ErrKeyNotFound
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if lease.LeaseID(req.Lease) != lease.NoLease {
|
|
||||||
if l := a.s.lessor.Lookup(lease.LeaseID(req.Lease)); l == nil {
|
|
||||||
return lease.ErrLeaseNotFound
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *applierV3backend) checkRequestRange(rv mvcc.ReadView, reqOp *pb.RequestOp) error {
|
|
||||||
tv, ok := reqOp.Request.(*pb.RequestOp_RequestRange)
|
|
||||||
if !ok || tv.RequestRange == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
req := tv.RequestRange
|
|
||||||
switch {
|
|
||||||
case req.Revision == 0:
|
|
||||||
return nil
|
|
||||||
case req.Revision > rv.Rev():
|
|
||||||
return mvcc.ErrFutureRev
|
|
||||||
case req.Revision < rv.FirstRev():
|
|
||||||
return mvcc.ErrCompacted
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func compareInt64(a, b int64) int {
|
|
||||||
switch {
|
|
||||||
case a < b:
|
|
||||||
return -1
|
|
||||||
case a > b:
|
|
||||||
return 1
|
|
||||||
default:
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// mkGteRange determines if the range end is a >= range. This works around grpc
|
|
||||||
// sending empty byte strings as nil; >= is encoded in the range end as '\0'.
|
|
||||||
// If it is a GTE range, then []byte{} is returned to indicate the empty byte
|
|
||||||
// string (vs nil being no byte string).
|
|
||||||
func mkGteRange(rangeEnd []byte) []byte {
|
|
||||||
if len(rangeEnd) == 1 && rangeEnd[0] == 0 {
|
|
||||||
return []byte{}
|
|
||||||
}
|
|
||||||
return rangeEnd
|
|
||||||
}
|
|
||||||
|
|
||||||
func noSideEffect(r *pb.InternalRaftRequest) bool {
|
func noSideEffect(r *pb.InternalRaftRequest) bool {
|
||||||
return r.Range != nil || r.AuthUserGet != nil || r.AuthRoleGet != nil || r.AuthStatus != nil
|
return r.Range != nil || r.AuthUserGet != nil || r.AuthRoleGet != nil || r.AuthStatus != nil
|
||||||
}
|
}
|
||||||
@ -1154,17 +599,6 @@ func removeNeedlessRangeReqs(txn *pb.TxnRequest) {
|
|||||||
txn.Failure = f(txn.Failure)
|
txn.Failure = f(txn.Failure)
|
||||||
}
|
}
|
||||||
|
|
||||||
func pruneKVs(rr *mvcc.RangeResult, isPrunable func(*mvccpb.KeyValue) bool) {
|
|
||||||
j := 0
|
|
||||||
for i := range rr.KVs {
|
|
||||||
rr.KVs[j] = rr.KVs[i]
|
|
||||||
if !isPrunable(&rr.KVs[i]) {
|
|
||||||
j++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
rr.KVs = rr.KVs[:j]
|
|
||||||
}
|
|
||||||
|
|
||||||
func newHeader(s *EtcdServer) *pb.ResponseHeader {
|
func newHeader(s *EtcdServer) *pb.ResponseHeader {
|
||||||
return &pb.ResponseHeader{
|
return &pb.ResponseHeader{
|
||||||
ClusterId: uint64(s.Cluster().ID()),
|
ClusterId: uint64(s.Cluster().ID()),
|
||||||
|
@ -252,8 +252,6 @@ type EtcdServer struct {
|
|||||||
|
|
||||||
// applyV3 is the applier with auth and quotas
|
// applyV3 is the applier with auth and quotas
|
||||||
applyV3 applierV3
|
applyV3 applierV3
|
||||||
// applyV3Base is the core applier without auth or quotas
|
|
||||||
applyV3Base applierV3
|
|
||||||
// applyV3Internal is the applier for internal request
|
// applyV3Internal is the applier for internal request
|
||||||
applyV3Internal applierV3Internal
|
applyV3Internal applierV3Internal
|
||||||
applyWait wait.WaitTime
|
applyWait wait.WaitTime
|
||||||
@ -392,7 +390,6 @@ func NewServer(cfg config.ServerConfig) (srv *EtcdServer, err error) {
|
|||||||
srv.compactor.Run()
|
srv.compactor.Run()
|
||||||
}
|
}
|
||||||
|
|
||||||
srv.applyV3Base = srv.newApplierV3Backend()
|
|
||||||
srv.applyV3Internal = srv.newApplierV3Internal()
|
srv.applyV3Internal = srv.newApplierV3Internal()
|
||||||
if err = srv.restoreAlarms(); err != nil {
|
if err = srv.restoreAlarms(); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
597
server/etcdserver/txn.go
Normal file
597
server/etcdserver/txn.go
Normal file
@ -0,0 +1,597 @@
|
|||||||
|
// Copyright 2022 The etcd Authors
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package etcdserver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
pb "go.etcd.io/etcd/api/v3/etcdserverpb"
|
||||||
|
"go.etcd.io/etcd/api/v3/mvccpb"
|
||||||
|
"go.etcd.io/etcd/pkg/v3/traceutil"
|
||||||
|
"go.etcd.io/etcd/server/v3/lease"
|
||||||
|
"go.etcd.io/etcd/server/v3/storage/mvcc"
|
||||||
|
"go.uber.org/zap"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Put(ctx context.Context, lg *zap.Logger, lessor lease.Lessor, kv mvcc.KV, txn mvcc.TxnWrite, p *pb.PutRequest) (resp *pb.PutResponse, trace *traceutil.Trace, err error) {
|
||||||
|
resp = &pb.PutResponse{}
|
||||||
|
resp.Header = &pb.ResponseHeader{}
|
||||||
|
trace = traceutil.Get(ctx)
|
||||||
|
// create put tracing if the trace in context is empty
|
||||||
|
if trace.IsEmpty() {
|
||||||
|
trace = traceutil.New("put",
|
||||||
|
lg,
|
||||||
|
traceutil.Field{Key: "key", Value: string(p.Key)},
|
||||||
|
traceutil.Field{Key: "req_size", Value: p.Size()},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
val, leaseID := p.Value, lease.LeaseID(p.Lease)
|
||||||
|
if txn == nil {
|
||||||
|
if leaseID != lease.NoLease {
|
||||||
|
if l := lessor.Lookup(leaseID); l == nil {
|
||||||
|
return nil, nil, lease.ErrLeaseNotFound
|
||||||
|
}
|
||||||
|
}
|
||||||
|
txn = kv.Write(trace)
|
||||||
|
defer txn.End()
|
||||||
|
}
|
||||||
|
|
||||||
|
var rr *mvcc.RangeResult
|
||||||
|
if p.IgnoreValue || p.IgnoreLease || p.PrevKv {
|
||||||
|
trace.StepWithFunction(func() {
|
||||||
|
rr, err = txn.Range(context.TODO(), p.Key, nil, mvcc.RangeOptions{})
|
||||||
|
}, "get previous kv pair")
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if p.IgnoreValue || p.IgnoreLease {
|
||||||
|
if rr == nil || len(rr.KVs) == 0 {
|
||||||
|
// ignore_{lease,value} flag expects previous key-value pair
|
||||||
|
return nil, nil, ErrKeyNotFound
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if p.IgnoreValue {
|
||||||
|
val = rr.KVs[0].Value
|
||||||
|
}
|
||||||
|
if p.IgnoreLease {
|
||||||
|
leaseID = lease.LeaseID(rr.KVs[0].Lease)
|
||||||
|
}
|
||||||
|
if p.PrevKv {
|
||||||
|
if rr != nil && len(rr.KVs) != 0 {
|
||||||
|
resp.PrevKv = &rr.KVs[0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resp.Header.Revision = txn.Put(p.Key, val, leaseID)
|
||||||
|
trace.AddField(traceutil.Field{Key: "response_revision", Value: resp.Header.Revision})
|
||||||
|
return resp, trace, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func DeleteRange(kv mvcc.KV, txn mvcc.TxnWrite, dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
|
||||||
|
resp := &pb.DeleteRangeResponse{}
|
||||||
|
resp.Header = &pb.ResponseHeader{}
|
||||||
|
end := mkGteRange(dr.RangeEnd)
|
||||||
|
|
||||||
|
if txn == nil {
|
||||||
|
txn = kv.Write(traceutil.TODO())
|
||||||
|
defer txn.End()
|
||||||
|
}
|
||||||
|
|
||||||
|
if dr.PrevKv {
|
||||||
|
rr, err := txn.Range(context.TODO(), dr.Key, end, mvcc.RangeOptions{})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if rr != nil {
|
||||||
|
resp.PrevKvs = make([]*mvccpb.KeyValue, len(rr.KVs))
|
||||||
|
for i := range rr.KVs {
|
||||||
|
resp.PrevKvs[i] = &rr.KVs[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resp.Deleted, resp.Header.Revision = txn.DeleteRange(dr.Key, end)
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func Range(ctx context.Context, lg *zap.Logger, kv mvcc.KV, txn mvcc.TxnRead, r *pb.RangeRequest) (*pb.RangeResponse, error) {
|
||||||
|
trace := traceutil.Get(ctx)
|
||||||
|
|
||||||
|
resp := &pb.RangeResponse{}
|
||||||
|
resp.Header = &pb.ResponseHeader{}
|
||||||
|
|
||||||
|
if txn == nil {
|
||||||
|
txn = kv.Read(mvcc.ConcurrentReadTxMode, trace)
|
||||||
|
defer txn.End()
|
||||||
|
}
|
||||||
|
|
||||||
|
limit := r.Limit
|
||||||
|
if r.SortOrder != pb.RangeRequest_NONE ||
|
||||||
|
r.MinModRevision != 0 || r.MaxModRevision != 0 ||
|
||||||
|
r.MinCreateRevision != 0 || r.MaxCreateRevision != 0 {
|
||||||
|
// fetch everything; sort and truncate afterwards
|
||||||
|
limit = 0
|
||||||
|
}
|
||||||
|
if limit > 0 {
|
||||||
|
// fetch one extra for 'more' flag
|
||||||
|
limit = limit + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
ro := mvcc.RangeOptions{
|
||||||
|
Limit: limit,
|
||||||
|
Rev: r.Revision,
|
||||||
|
Count: r.CountOnly,
|
||||||
|
}
|
||||||
|
|
||||||
|
rr, err := txn.Range(ctx, r.Key, mkGteRange(r.RangeEnd), ro)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.MaxModRevision != 0 {
|
||||||
|
f := func(kv *mvccpb.KeyValue) bool { return kv.ModRevision > r.MaxModRevision }
|
||||||
|
pruneKVs(rr, f)
|
||||||
|
}
|
||||||
|
if r.MinModRevision != 0 {
|
||||||
|
f := func(kv *mvccpb.KeyValue) bool { return kv.ModRevision < r.MinModRevision }
|
||||||
|
pruneKVs(rr, f)
|
||||||
|
}
|
||||||
|
if r.MaxCreateRevision != 0 {
|
||||||
|
f := func(kv *mvccpb.KeyValue) bool { return kv.CreateRevision > r.MaxCreateRevision }
|
||||||
|
pruneKVs(rr, f)
|
||||||
|
}
|
||||||
|
if r.MinCreateRevision != 0 {
|
||||||
|
f := func(kv *mvccpb.KeyValue) bool { return kv.CreateRevision < r.MinCreateRevision }
|
||||||
|
pruneKVs(rr, f)
|
||||||
|
}
|
||||||
|
|
||||||
|
sortOrder := r.SortOrder
|
||||||
|
if r.SortTarget != pb.RangeRequest_KEY && sortOrder == pb.RangeRequest_NONE {
|
||||||
|
// Since current mvcc.Range implementation returns results
|
||||||
|
// sorted by keys in lexiographically ascending order,
|
||||||
|
// sort ASCEND by default only when target is not 'KEY'
|
||||||
|
sortOrder = pb.RangeRequest_ASCEND
|
||||||
|
} else if r.SortTarget == pb.RangeRequest_KEY && sortOrder == pb.RangeRequest_ASCEND {
|
||||||
|
// Since current mvcc.Range implementation returns results
|
||||||
|
// sorted by keys in lexiographically ascending order,
|
||||||
|
// don't re-sort when target is 'KEY' and order is ASCEND
|
||||||
|
sortOrder = pb.RangeRequest_NONE
|
||||||
|
}
|
||||||
|
if sortOrder != pb.RangeRequest_NONE {
|
||||||
|
var sorter sort.Interface
|
||||||
|
switch {
|
||||||
|
case r.SortTarget == pb.RangeRequest_KEY:
|
||||||
|
sorter = &kvSortByKey{&kvSort{rr.KVs}}
|
||||||
|
case r.SortTarget == pb.RangeRequest_VERSION:
|
||||||
|
sorter = &kvSortByVersion{&kvSort{rr.KVs}}
|
||||||
|
case r.SortTarget == pb.RangeRequest_CREATE:
|
||||||
|
sorter = &kvSortByCreate{&kvSort{rr.KVs}}
|
||||||
|
case r.SortTarget == pb.RangeRequest_MOD:
|
||||||
|
sorter = &kvSortByMod{&kvSort{rr.KVs}}
|
||||||
|
case r.SortTarget == pb.RangeRequest_VALUE:
|
||||||
|
sorter = &kvSortByValue{&kvSort{rr.KVs}}
|
||||||
|
default:
|
||||||
|
lg.Panic("unexpected sort target", zap.Int32("sort-target", int32(r.SortTarget)))
|
||||||
|
}
|
||||||
|
switch {
|
||||||
|
case sortOrder == pb.RangeRequest_ASCEND:
|
||||||
|
sort.Sort(sorter)
|
||||||
|
case sortOrder == pb.RangeRequest_DESCEND:
|
||||||
|
sort.Sort(sort.Reverse(sorter))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.Limit > 0 && len(rr.KVs) > int(r.Limit) {
|
||||||
|
rr.KVs = rr.KVs[:r.Limit]
|
||||||
|
resp.More = true
|
||||||
|
}
|
||||||
|
trace.Step("filter and sort the key-value pairs")
|
||||||
|
resp.Header.Revision = rr.Rev
|
||||||
|
resp.Count = int64(rr.Count)
|
||||||
|
resp.Kvs = make([]*mvccpb.KeyValue, len(rr.KVs))
|
||||||
|
for i := range rr.KVs {
|
||||||
|
if r.KeysOnly {
|
||||||
|
rr.KVs[i].Value = nil
|
||||||
|
}
|
||||||
|
resp.Kvs[i] = &rr.KVs[i]
|
||||||
|
}
|
||||||
|
trace.Step("assemble the response")
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func Txn(ctx context.Context, lg *zap.Logger, rt *pb.TxnRequest, txnModeWriteWithSharedBuffer bool, kv mvcc.KV, lessor lease.Lessor) (*pb.TxnResponse, *traceutil.Trace, error) {
|
||||||
|
trace := traceutil.Get(ctx)
|
||||||
|
if trace.IsEmpty() {
|
||||||
|
trace = traceutil.New("transaction", lg)
|
||||||
|
ctx = context.WithValue(ctx, traceutil.TraceKey, trace)
|
||||||
|
}
|
||||||
|
isWrite := !isTxnReadonly(rt)
|
||||||
|
|
||||||
|
// When the transaction contains write operations, we use ReadTx instead of
|
||||||
|
// ConcurrentReadTx to avoid extra overhead of copying buffer.
|
||||||
|
var txn mvcc.TxnWrite
|
||||||
|
if isWrite && txnModeWriteWithSharedBuffer /*a.s.Cfg.ExperimentalTxnModeWriteWithSharedBuffer*/ {
|
||||||
|
txn = mvcc.NewReadOnlyTxnWrite(kv.Read(mvcc.SharedBufReadTxMode, trace))
|
||||||
|
} else {
|
||||||
|
txn = mvcc.NewReadOnlyTxnWrite(kv.Read(mvcc.ConcurrentReadTxMode, trace))
|
||||||
|
}
|
||||||
|
|
||||||
|
var txnPath []bool
|
||||||
|
trace.StepWithFunction(
|
||||||
|
func() {
|
||||||
|
txnPath = compareToPath(txn, rt)
|
||||||
|
},
|
||||||
|
"compare",
|
||||||
|
)
|
||||||
|
|
||||||
|
if isWrite {
|
||||||
|
trace.AddField(traceutil.Field{Key: "read_only", Value: false})
|
||||||
|
if _, err := checkRequests(txn, rt, txnPath,
|
||||||
|
func(rv mvcc.ReadView, ro *pb.RequestOp) error { return checkRequestPut(rv, lessor, ro) }); err != nil {
|
||||||
|
txn.End()
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if _, err := checkRequests(txn, rt, txnPath, checkRequestRange); err != nil {
|
||||||
|
txn.End()
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
trace.Step("check requests")
|
||||||
|
txnResp, _ := newTxnResp(rt, txnPath)
|
||||||
|
|
||||||
|
// When executing mutable txn ops, etcd must hold the txn lock so
|
||||||
|
// readers do not see any intermediate results. Since writes are
|
||||||
|
// serialized on the raft loop, the revision in the read view will
|
||||||
|
// be the revision of the write txn.
|
||||||
|
if isWrite {
|
||||||
|
txn.End()
|
||||||
|
txn = kv.Write(trace)
|
||||||
|
}
|
||||||
|
applyTxn(ctx, lg, kv, lessor, txn, rt, txnPath, txnResp)
|
||||||
|
rev := txn.Rev()
|
||||||
|
if len(txn.Changes()) != 0 {
|
||||||
|
rev++
|
||||||
|
}
|
||||||
|
txn.End()
|
||||||
|
|
||||||
|
txnResp.Header.Revision = rev
|
||||||
|
trace.AddField(
|
||||||
|
traceutil.Field{Key: "number_of_response", Value: len(txnResp.Responses)},
|
||||||
|
traceutil.Field{Key: "response_revision", Value: txnResp.Header.Revision},
|
||||||
|
)
|
||||||
|
return txnResp, trace, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// newTxnResp allocates a txn response for a txn request given a path.
|
||||||
|
func newTxnResp(rt *pb.TxnRequest, txnPath []bool) (txnResp *pb.TxnResponse, txnCount int) {
|
||||||
|
reqs := rt.Success
|
||||||
|
if !txnPath[0] {
|
||||||
|
reqs = rt.Failure
|
||||||
|
}
|
||||||
|
resps := make([]*pb.ResponseOp, len(reqs))
|
||||||
|
txnResp = &pb.TxnResponse{
|
||||||
|
Responses: resps,
|
||||||
|
Succeeded: txnPath[0],
|
||||||
|
Header: &pb.ResponseHeader{},
|
||||||
|
}
|
||||||
|
for i, req := range reqs {
|
||||||
|
switch tv := req.Request.(type) {
|
||||||
|
case *pb.RequestOp_RequestRange:
|
||||||
|
resps[i] = &pb.ResponseOp{Response: &pb.ResponseOp_ResponseRange{}}
|
||||||
|
case *pb.RequestOp_RequestPut:
|
||||||
|
resps[i] = &pb.ResponseOp{Response: &pb.ResponseOp_ResponsePut{}}
|
||||||
|
case *pb.RequestOp_RequestDeleteRange:
|
||||||
|
resps[i] = &pb.ResponseOp{Response: &pb.ResponseOp_ResponseDeleteRange{}}
|
||||||
|
case *pb.RequestOp_RequestTxn:
|
||||||
|
resp, txns := newTxnResp(tv.RequestTxn, txnPath[1:])
|
||||||
|
resps[i] = &pb.ResponseOp{Response: &pb.ResponseOp_ResponseTxn{ResponseTxn: resp}}
|
||||||
|
txnPath = txnPath[1+txns:]
|
||||||
|
txnCount += txns + 1
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return txnResp, txnCount
|
||||||
|
}
|
||||||
|
|
||||||
|
func applyTxn(ctx context.Context, lg *zap.Logger, kv mvcc.KV, lessor lease.Lessor, txn mvcc.TxnWrite, rt *pb.TxnRequest, txnPath []bool, tresp *pb.TxnResponse) (txns int) {
|
||||||
|
trace := traceutil.Get(ctx)
|
||||||
|
reqs := rt.Success
|
||||||
|
if !txnPath[0] {
|
||||||
|
reqs = rt.Failure
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, req := range reqs {
|
||||||
|
respi := tresp.Responses[i].Response
|
||||||
|
switch tv := req.Request.(type) {
|
||||||
|
case *pb.RequestOp_RequestRange:
|
||||||
|
trace.StartSubTrace(
|
||||||
|
traceutil.Field{Key: "req_type", Value: "range"},
|
||||||
|
traceutil.Field{Key: "range_begin", Value: string(tv.RequestRange.Key)},
|
||||||
|
traceutil.Field{Key: "range_end", Value: string(tv.RequestRange.RangeEnd)})
|
||||||
|
resp, err := Range(ctx, lg, kv, txn, tv.RequestRange)
|
||||||
|
if err != nil {
|
||||||
|
lg.Panic("unexpected error during txn", zap.Error(err))
|
||||||
|
}
|
||||||
|
respi.(*pb.ResponseOp_ResponseRange).ResponseRange = resp
|
||||||
|
trace.StopSubTrace()
|
||||||
|
case *pb.RequestOp_RequestPut:
|
||||||
|
trace.StartSubTrace(
|
||||||
|
traceutil.Field{Key: "req_type", Value: "put"},
|
||||||
|
traceutil.Field{Key: "key", Value: string(tv.RequestPut.Key)},
|
||||||
|
traceutil.Field{Key: "req_size", Value: tv.RequestPut.Size()})
|
||||||
|
resp, _, err := Put(ctx, lg, lessor, kv, txn, tv.RequestPut)
|
||||||
|
if err != nil {
|
||||||
|
lg.Panic("unexpected error during txn", zap.Error(err))
|
||||||
|
}
|
||||||
|
respi.(*pb.ResponseOp_ResponsePut).ResponsePut = resp
|
||||||
|
trace.StopSubTrace()
|
||||||
|
case *pb.RequestOp_RequestDeleteRange:
|
||||||
|
resp, err := DeleteRange(kv, txn, tv.RequestDeleteRange)
|
||||||
|
if err != nil {
|
||||||
|
lg.Panic("unexpected error during txn", zap.Error(err))
|
||||||
|
}
|
||||||
|
respi.(*pb.ResponseOp_ResponseDeleteRange).ResponseDeleteRange = resp
|
||||||
|
case *pb.RequestOp_RequestTxn:
|
||||||
|
resp := respi.(*pb.ResponseOp_ResponseTxn).ResponseTxn
|
||||||
|
applyTxns := applyTxn(ctx, lg, kv, lessor, txn, tv.RequestTxn, txnPath[1:], resp)
|
||||||
|
txns += applyTxns + 1
|
||||||
|
txnPath = txnPath[applyTxns+1:]
|
||||||
|
default:
|
||||||
|
// empty union
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return txns
|
||||||
|
}
|
||||||
|
|
||||||
|
//---------------------------------------------------------
|
||||||
|
|
||||||
|
type checkReqFunc func(mvcc.ReadView, *pb.RequestOp) error
|
||||||
|
|
||||||
|
func checkRequestPut(rv mvcc.ReadView, lessor lease.Lessor, reqOp *pb.RequestOp) error {
|
||||||
|
tv, ok := reqOp.Request.(*pb.RequestOp_RequestPut)
|
||||||
|
if !ok || tv.RequestPut == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
req := tv.RequestPut
|
||||||
|
if req.IgnoreValue || req.IgnoreLease {
|
||||||
|
// expects previous key-value, error if not exist
|
||||||
|
rr, err := rv.Range(context.TODO(), req.Key, nil, mvcc.RangeOptions{})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if rr == nil || len(rr.KVs) == 0 {
|
||||||
|
return ErrKeyNotFound
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if lease.LeaseID(req.Lease) != lease.NoLease {
|
||||||
|
if l := lessor.Lookup(lease.LeaseID(req.Lease)); l == nil {
|
||||||
|
return lease.ErrLeaseNotFound
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkRequestRange(rv mvcc.ReadView, reqOp *pb.RequestOp) error {
|
||||||
|
tv, ok := reqOp.Request.(*pb.RequestOp_RequestRange)
|
||||||
|
if !ok || tv.RequestRange == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
req := tv.RequestRange
|
||||||
|
switch {
|
||||||
|
case req.Revision == 0:
|
||||||
|
return nil
|
||||||
|
case req.Revision > rv.Rev():
|
||||||
|
return mvcc.ErrFutureRev
|
||||||
|
case req.Revision < rv.FirstRev():
|
||||||
|
return mvcc.ErrCompacted
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkRequests(rv mvcc.ReadView, rt *pb.TxnRequest, txnPath []bool, f checkReqFunc) (int, error) {
|
||||||
|
txnCount := 0
|
||||||
|
reqs := rt.Success
|
||||||
|
if !txnPath[0] {
|
||||||
|
reqs = rt.Failure
|
||||||
|
}
|
||||||
|
for _, req := range reqs {
|
||||||
|
if tv, ok := req.Request.(*pb.RequestOp_RequestTxn); ok && tv.RequestTxn != nil {
|
||||||
|
txns, err := checkRequests(rv, tv.RequestTxn, txnPath[1:], f)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
txnCount += txns + 1
|
||||||
|
txnPath = txnPath[txns+1:]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := f(rv, req); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return txnCount, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// mkGteRange determines if the range end is a >= range. This works around grpc
|
||||||
|
// sending empty byte strings as nil; >= is encoded in the range end as '\0'.
|
||||||
|
// If it is a GTE range, then []byte{} is returned to indicate the empty byte
|
||||||
|
// string (vs nil being no byte string).
|
||||||
|
func mkGteRange(rangeEnd []byte) []byte {
|
||||||
|
if len(rangeEnd) == 1 && rangeEnd[0] == 0 {
|
||||||
|
return []byte{}
|
||||||
|
}
|
||||||
|
return rangeEnd
|
||||||
|
}
|
||||||
|
|
||||||
|
func pruneKVs(rr *mvcc.RangeResult, isPrunable func(*mvccpb.KeyValue) bool) {
|
||||||
|
j := 0
|
||||||
|
for i := range rr.KVs {
|
||||||
|
rr.KVs[j] = rr.KVs[i]
|
||||||
|
if !isPrunable(&rr.KVs[i]) {
|
||||||
|
j++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rr.KVs = rr.KVs[:j]
|
||||||
|
}
|
||||||
|
|
||||||
|
type kvSort struct{ kvs []mvccpb.KeyValue }
|
||||||
|
|
||||||
|
func (s *kvSort) Swap(i, j int) {
|
||||||
|
t := s.kvs[i]
|
||||||
|
s.kvs[i] = s.kvs[j]
|
||||||
|
s.kvs[j] = t
|
||||||
|
}
|
||||||
|
func (s *kvSort) Len() int { return len(s.kvs) }
|
||||||
|
|
||||||
|
type kvSortByKey struct{ *kvSort }
|
||||||
|
|
||||||
|
func (s *kvSortByKey) Less(i, j int) bool {
|
||||||
|
return bytes.Compare(s.kvs[i].Key, s.kvs[j].Key) < 0
|
||||||
|
}
|
||||||
|
|
||||||
|
type kvSortByVersion struct{ *kvSort }
|
||||||
|
|
||||||
|
func (s *kvSortByVersion) Less(i, j int) bool {
|
||||||
|
return (s.kvs[i].Version - s.kvs[j].Version) < 0
|
||||||
|
}
|
||||||
|
|
||||||
|
type kvSortByCreate struct{ *kvSort }
|
||||||
|
|
||||||
|
func (s *kvSortByCreate) Less(i, j int) bool {
|
||||||
|
return (s.kvs[i].CreateRevision - s.kvs[j].CreateRevision) < 0
|
||||||
|
}
|
||||||
|
|
||||||
|
type kvSortByMod struct{ *kvSort }
|
||||||
|
|
||||||
|
func (s *kvSortByMod) Less(i, j int) bool {
|
||||||
|
return (s.kvs[i].ModRevision - s.kvs[j].ModRevision) < 0
|
||||||
|
}
|
||||||
|
|
||||||
|
type kvSortByValue struct{ *kvSort }
|
||||||
|
|
||||||
|
func (s *kvSortByValue) Less(i, j int) bool {
|
||||||
|
return bytes.Compare(s.kvs[i].Value, s.kvs[j].Value) < 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func compareInt64(a, b int64) int {
|
||||||
|
switch {
|
||||||
|
case a < b:
|
||||||
|
return -1
|
||||||
|
case a > b:
|
||||||
|
return 1
|
||||||
|
default:
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func compareToPath(rv mvcc.ReadView, rt *pb.TxnRequest) []bool {
|
||||||
|
txnPath := make([]bool, 1)
|
||||||
|
ops := rt.Success
|
||||||
|
if txnPath[0] = applyCompares(rv, rt.Compare); !txnPath[0] {
|
||||||
|
ops = rt.Failure
|
||||||
|
}
|
||||||
|
for _, op := range ops {
|
||||||
|
tv, ok := op.Request.(*pb.RequestOp_RequestTxn)
|
||||||
|
if !ok || tv.RequestTxn == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
txnPath = append(txnPath, compareToPath(rv, tv.RequestTxn)...)
|
||||||
|
}
|
||||||
|
return txnPath
|
||||||
|
}
|
||||||
|
|
||||||
|
func applyCompares(rv mvcc.ReadView, cmps []*pb.Compare) bool {
|
||||||
|
for _, c := range cmps {
|
||||||
|
if !applyCompare(rv, c) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// applyCompare applies the compare request.
|
||||||
|
// If the comparison succeeds, it returns true. Otherwise, returns false.
|
||||||
|
func applyCompare(rv mvcc.ReadView, c *pb.Compare) bool {
|
||||||
|
// TODO: possible optimizations
|
||||||
|
// * chunk reads for large ranges to conserve memory
|
||||||
|
// * rewrite rules for common patterns:
|
||||||
|
// ex. "[a, b) createrev > 0" => "limit 1 /\ kvs > 0"
|
||||||
|
// * caching
|
||||||
|
rr, err := rv.Range(context.TODO(), c.Key, mkGteRange(c.RangeEnd), mvcc.RangeOptions{})
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if len(rr.KVs) == 0 {
|
||||||
|
if c.Target == pb.Compare_VALUE {
|
||||||
|
// Always fail if comparing a value on a key/keys that doesn't exist;
|
||||||
|
// nil == empty string in grpc; no way to represent missing value
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return compareKV(c, mvccpb.KeyValue{})
|
||||||
|
}
|
||||||
|
for _, kv := range rr.KVs {
|
||||||
|
if !compareKV(c, kv) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func compareKV(c *pb.Compare, ckv mvccpb.KeyValue) bool {
|
||||||
|
var result int
|
||||||
|
rev := int64(0)
|
||||||
|
switch c.Target {
|
||||||
|
case pb.Compare_VALUE:
|
||||||
|
v := []byte{}
|
||||||
|
if tv, _ := c.TargetUnion.(*pb.Compare_Value); tv != nil {
|
||||||
|
v = tv.Value
|
||||||
|
}
|
||||||
|
result = bytes.Compare(ckv.Value, v)
|
||||||
|
case pb.Compare_CREATE:
|
||||||
|
if tv, _ := c.TargetUnion.(*pb.Compare_CreateRevision); tv != nil {
|
||||||
|
rev = tv.CreateRevision
|
||||||
|
}
|
||||||
|
result = compareInt64(ckv.CreateRevision, rev)
|
||||||
|
case pb.Compare_MOD:
|
||||||
|
if tv, _ := c.TargetUnion.(*pb.Compare_ModRevision); tv != nil {
|
||||||
|
rev = tv.ModRevision
|
||||||
|
}
|
||||||
|
result = compareInt64(ckv.ModRevision, rev)
|
||||||
|
case pb.Compare_VERSION:
|
||||||
|
if tv, _ := c.TargetUnion.(*pb.Compare_Version); tv != nil {
|
||||||
|
rev = tv.Version
|
||||||
|
}
|
||||||
|
result = compareInt64(ckv.Version, rev)
|
||||||
|
case pb.Compare_LEASE:
|
||||||
|
if tv, _ := c.TargetUnion.(*pb.Compare_Lease); tv != nil {
|
||||||
|
rev = tv.Lease
|
||||||
|
}
|
||||||
|
result = compareInt64(ckv.Lease, rev)
|
||||||
|
}
|
||||||
|
switch c.Result {
|
||||||
|
case pb.Compare_EQUAL:
|
||||||
|
return result == 0
|
||||||
|
case pb.Compare_NOT_EQUAL:
|
||||||
|
return result != 0
|
||||||
|
case pb.Compare_GREATER:
|
||||||
|
return result > 0
|
||||||
|
case pb.Compare_LESS:
|
||||||
|
return result < 0
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
@ -128,7 +128,7 @@ func (s *EtcdServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeRe
|
|||||||
return s.authStore.IsRangePermitted(ai, r.Key, r.RangeEnd)
|
return s.authStore.IsRangePermitted(ai, r.Key, r.RangeEnd)
|
||||||
}
|
}
|
||||||
|
|
||||||
get := func() { resp, err = s.applyV3Base.Range(ctx, nil, r) }
|
get := func() { resp, err = Range(ctx, s.Logger(), s.KV(), nil, r) }
|
||||||
if serr := s.doSerialize(ctx, chk, get); serr != nil {
|
if serr := s.doSerialize(ctx, chk, get); serr != nil {
|
||||||
err = serr
|
err = serr
|
||||||
return nil, err
|
return nil, err
|
||||||
@ -178,7 +178,9 @@ func (s *EtcdServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse
|
|||||||
trace.LogIfLong(traceThreshold)
|
trace.LogIfLong(traceThreshold)
|
||||||
}(time.Now())
|
}(time.Now())
|
||||||
|
|
||||||
get := func() { resp, _, err = s.applyV3Base.Txn(ctx, r) }
|
get := func() {
|
||||||
|
resp, _, err = Txn(ctx, s.Logger(), r, s.Cfg.ExperimentalTxnModeWriteWithSharedBuffer, s.KV(), s.lessor)
|
||||||
|
}
|
||||||
if serr := s.doSerialize(ctx, chk, get); serr != nil {
|
if serr := s.doSerialize(ctx, chk, get); serr != nil {
|
||||||
return nil, serr
|
return nil, serr
|
||||||
}
|
}
|
||||||
@ -390,7 +392,8 @@ func (s *EtcdServer) LeaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveR
|
|||||||
return nil, ErrCanceled
|
return nil, ErrCanceled
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *EtcdServer) LeaseLeases(ctx context.Context, r *pb.LeaseLeasesRequest) (*pb.LeaseLeasesResponse, error) {
|
// LeaseLeases is really ListLeases !???
|
||||||
|
func (s *EtcdServer) LeaseLeases(_ context.Context, _ *pb.LeaseLeasesRequest) (*pb.LeaseLeasesResponse, error) {
|
||||||
ls := s.lessor.Leases()
|
ls := s.lessor.Leases()
|
||||||
lss := make([]*pb.LeaseStatus, len(ls))
|
lss := make([]*pb.LeaseStatus, len(ls))
|
||||||
for i := range ls {
|
for i := range ls {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user