mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00

To improve debuggability of etcd v3. Added a grpc interceptor to log info on incoming requests to etcd server. The log output includes remote client info, request content (with value field redacted), request handling latency, response size, etc. Dependency on zap logger and grpc_middleware is removed during backporting. Added checking in logging interceptor. If debug level is disabled, skip logUnaryRequestStats() to avoid potential performance degradation. (PR #10021)
254 lines
7.2 KiB
Go
254 lines
7.2 KiB
Go
// Copyright 2016 The etcd Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package v3rpc
|
|
|
|
import (
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/coreos/etcd/etcdserver"
|
|
"github.com/coreos/etcd/etcdserver/api"
|
|
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
|
|
"github.com/coreos/etcd/pkg/types"
|
|
"github.com/coreos/etcd/raft"
|
|
|
|
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
|
"github.com/coreos/pkg/capnslog"
|
|
prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
|
|
"golang.org/x/net/context"
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/metadata"
|
|
"google.golang.org/grpc/peer"
|
|
)
|
|
|
|
const (
|
|
maxNoLeaderCnt = 3
|
|
)
|
|
|
|
type streamsMap struct {
|
|
mu sync.Mutex
|
|
streams map[grpc.ServerStream]struct{}
|
|
}
|
|
|
|
func newUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerInterceptor {
|
|
return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) {
|
|
if !api.IsCapabilityEnabled(api.V3rpcCapability) {
|
|
return nil, rpctypes.ErrGRPCNotCapable
|
|
}
|
|
|
|
md, ok := metadata.FromContext(ctx)
|
|
if ok {
|
|
if ks := md[rpctypes.MetadataRequireLeaderKey]; len(ks) > 0 && ks[0] == rpctypes.MetadataHasLeader {
|
|
if s.Leader() == types.ID(raft.None) {
|
|
return nil, rpctypes.ErrGRPCNoLeader
|
|
}
|
|
}
|
|
}
|
|
|
|
return logUnaryInterceptor(ctx, req, info, handler)
|
|
// interceptors are chained manually during backporting, for better readability refer to PR #9990
|
|
}
|
|
}
|
|
|
|
// logUnaryInterceptor is a gRPC server-side interceptor that prints info on incoming requests for debugging purpose
|
|
func logUnaryInterceptor(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) {
|
|
startTime := time.Now()
|
|
resp, err := prometheus.UnaryServerInterceptor(ctx, req, info, handler)
|
|
// interceptors are chained manually during backporting, for better readability refer to PR #9990
|
|
if plog.LevelAt(capnslog.DEBUG) {
|
|
defer logUnaryRequestStats(ctx, info, startTime, req, resp)
|
|
}
|
|
return resp, err
|
|
}
|
|
|
|
func logUnaryRequestStats(ctx context.Context, info *grpc.UnaryServerInfo, startTime time.Time, req interface{}, resp interface{}) {
|
|
duration := time.Since(startTime)
|
|
remote := "No remote client info."
|
|
peerInfo, ok := peer.FromContext(ctx)
|
|
if ok {
|
|
remote = peerInfo.Addr.String()
|
|
}
|
|
var responseType string = info.FullMethod
|
|
var reqCount, respCount int64
|
|
var reqSize, respSize int
|
|
var reqContent string
|
|
switch _resp := resp.(type) {
|
|
case *pb.RangeResponse:
|
|
_req, ok := req.(*pb.RangeRequest)
|
|
if ok {
|
|
reqCount = 0
|
|
reqSize = _req.Size()
|
|
reqContent = _req.String()
|
|
}
|
|
if _resp != nil {
|
|
respCount = _resp.GetCount()
|
|
respSize = _resp.Size()
|
|
}
|
|
case *pb.PutResponse:
|
|
_req, ok := req.(*pb.PutRequest)
|
|
if ok {
|
|
reqCount = 1
|
|
reqSize = _req.Size()
|
|
reqContent = pb.NewLoggablePutRequest(_req).String()
|
|
// redact value field from request content, see PR #9821
|
|
}
|
|
if _resp != nil {
|
|
respCount = 0
|
|
respSize = _resp.Size()
|
|
}
|
|
case *pb.DeleteRangeResponse:
|
|
_req, ok := req.(*pb.DeleteRangeRequest)
|
|
if ok {
|
|
reqCount = 0
|
|
reqSize = _req.Size()
|
|
reqContent = _req.String()
|
|
}
|
|
if _resp != nil {
|
|
respCount = _resp.GetDeleted()
|
|
respSize = _resp.Size()
|
|
}
|
|
case *pb.TxnResponse:
|
|
_req, ok := req.(*pb.TxnRequest)
|
|
if ok && _resp != nil {
|
|
if _resp.GetSucceeded() { // determine the 'actual' count and size of request based on success or failure
|
|
reqCount = int64(len(_req.GetSuccess()))
|
|
reqSize = 0
|
|
for _, r := range _req.GetSuccess() {
|
|
reqSize += r.Size()
|
|
}
|
|
} else {
|
|
reqCount = int64(len(_req.GetFailure()))
|
|
reqSize = 0
|
|
for _, r := range _req.GetFailure() {
|
|
reqSize += r.Size()
|
|
}
|
|
}
|
|
reqContent = pb.NewLoggableTxnRequest(_req).String()
|
|
// redact value field from request content, see PR #9821
|
|
}
|
|
if _resp != nil {
|
|
respCount = 0
|
|
respSize = _resp.Size()
|
|
}
|
|
default:
|
|
reqCount = -1
|
|
reqSize = -1
|
|
respCount = -1
|
|
respSize = -1
|
|
}
|
|
|
|
logGenericRequestStats(startTime, duration, remote, responseType, reqCount, reqSize, respCount, respSize, reqContent)
|
|
}
|
|
|
|
func logGenericRequestStats(startTime time.Time, duration time.Duration, remote string, responseType string,
|
|
reqCount int64, reqSize int, respCount int64, respSize int, reqContent string) {
|
|
plog.Debugf("start time = %v, "+
|
|
"time spent = %v, "+
|
|
"remote = %s, "+
|
|
"response type = %s, "+
|
|
"request count = %d, "+
|
|
"request size = %d, "+
|
|
"response count = %d, "+
|
|
"response size = %d, "+
|
|
"request content = %s",
|
|
startTime, duration, remote, responseType, reqCount, reqSize, respCount, respSize, reqContent,
|
|
)
|
|
}
|
|
|
|
func newStreamInterceptor(s *etcdserver.EtcdServer) grpc.StreamServerInterceptor {
|
|
smap := monitorLeader(s)
|
|
|
|
return func(srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error {
|
|
if !api.IsCapabilityEnabled(api.V3rpcCapability) {
|
|
return rpctypes.ErrGRPCNotCapable
|
|
}
|
|
|
|
md, ok := metadata.FromContext(ss.Context())
|
|
if ok {
|
|
if ks := md[rpctypes.MetadataRequireLeaderKey]; len(ks) > 0 && ks[0] == rpctypes.MetadataHasLeader {
|
|
if s.Leader() == types.ID(raft.None) {
|
|
return rpctypes.ErrGRPCNoLeader
|
|
}
|
|
|
|
cctx, cancel := context.WithCancel(ss.Context())
|
|
ss = serverStreamWithCtx{ctx: cctx, cancel: &cancel, ServerStream: ss}
|
|
|
|
smap.mu.Lock()
|
|
smap.streams[ss] = struct{}{}
|
|
smap.mu.Unlock()
|
|
|
|
defer func() {
|
|
smap.mu.Lock()
|
|
delete(smap.streams, ss)
|
|
smap.mu.Unlock()
|
|
cancel()
|
|
}()
|
|
|
|
}
|
|
}
|
|
|
|
return prometheus.StreamServerInterceptor(srv, ss, info, handler)
|
|
}
|
|
}
|
|
|
|
type serverStreamWithCtx struct {
|
|
grpc.ServerStream
|
|
ctx context.Context
|
|
cancel *context.CancelFunc
|
|
}
|
|
|
|
func (ssc serverStreamWithCtx) Context() context.Context { return ssc.ctx }
|
|
|
|
func monitorLeader(s *etcdserver.EtcdServer) *streamsMap {
|
|
smap := &streamsMap{
|
|
streams: make(map[grpc.ServerStream]struct{}),
|
|
}
|
|
|
|
go func() {
|
|
election := time.Duration(s.Cfg.TickMs) * time.Duration(s.Cfg.ElectionTicks) * time.Millisecond
|
|
noLeaderCnt := 0
|
|
|
|
for {
|
|
select {
|
|
case <-s.StopNotify():
|
|
return
|
|
case <-time.After(election):
|
|
if s.Leader() == types.ID(raft.None) {
|
|
noLeaderCnt++
|
|
} else {
|
|
noLeaderCnt = 0
|
|
}
|
|
|
|
// We are more conservative on canceling existing streams. Reconnecting streams
|
|
// cost much more than just rejecting new requests. So we wait until the member
|
|
// cannot find a leader for maxNoLeaderCnt election timeouts to cancel existing streams.
|
|
if noLeaderCnt >= maxNoLeaderCnt {
|
|
smap.mu.Lock()
|
|
for ss := range smap.streams {
|
|
if ssWithCtx, ok := ss.(serverStreamWithCtx); ok {
|
|
(*ssWithCtx.cancel)()
|
|
<-ss.Context().Done()
|
|
}
|
|
}
|
|
smap.streams = make(map[grpc.ServerStream]struct{})
|
|
smap.mu.Unlock()
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
|
|
return smap
|
|
}
|