etcdserver: configure "expensive" requests duration

When a unary request takes more than predefined duration, this request
is defined as "expensive" and a warning is printed. The expensive request
duration is hard-coded to 300 ms. It can be not enough for example
for transactions with a lot of operations. The warnings just blow up
the log files and reduce throughput.

This fix allows user to configure the "expensive" request duration.

Signed-off-by: Alexey Roytman <roytman@il.ibm.com>
This commit is contained in:
Alexey Roytman 2021-07-19 14:01:40 +03:00
parent a1fd98c6b0
commit 2a26f7ae4c
No known key found for this signature in database
GPG Key ID: 4C2A6C2B769909A7
7 changed files with 28 additions and 17 deletions

View File

@ -121,7 +121,8 @@ type ServerConfig struct {
// MaxRequestBytes is the maximum request size to send over raft.
MaxRequestBytes uint
WarningApplyDuration time.Duration
WarningApplyDuration time.Duration
WarningUnaryRequestDuration time.Duration
StrictReconfigCheck bool

View File

@ -49,16 +49,17 @@ const (
ClusterStateFlagNew = "new"
ClusterStateFlagExisting = "existing"
DefaultName = "default"
DefaultMaxSnapshots = 5
DefaultMaxWALs = 5
DefaultMaxTxnOps = uint(128)
DefaultWarningApplyDuration = 100 * time.Millisecond
DefaultMaxRequestBytes = 1.5 * 1024 * 1024
DefaultGRPCKeepAliveMinTime = 5 * time.Second
DefaultGRPCKeepAliveInterval = 2 * time.Hour
DefaultGRPCKeepAliveTimeout = 20 * time.Second
DefaultDowngradeCheckTime = 5 * time.Second
DefaultName = "default"
DefaultMaxSnapshots = 5
DefaultMaxWALs = 5
DefaultMaxTxnOps = uint(128)
DefaultWarningApplyDuration = 100 * time.Millisecond
DefaultWarningUnaryRequestDuration = 300 * time.Millisecond
DefaultMaxRequestBytes = 1.5 * 1024 * 1024
DefaultGRPCKeepAliveMinTime = 5 * time.Second
DefaultGRPCKeepAliveInterval = 2 * time.Hour
DefaultGRPCKeepAliveTimeout = 20 * time.Second
DefaultDowngradeCheckTime = 5 * time.Second
DefaultListenPeerURLs = "http://localhost:2380"
DefaultListenClientURLs = "http://localhost:2379"
@ -326,6 +327,9 @@ type Config struct {
// ExperimentalBootstrapDefragThresholdMegabytes is the minimum number of megabytes needed to be freed for etcd server to
// consider running defrag during bootstrap. Needs to be set to non-zero value to take effect.
ExperimentalBootstrapDefragThresholdMegabytes uint `json:"experimental-bootstrap-defrag-threshold-megabytes"`
// ExperimentalWarningUnaryRequestDuration is the time duration after which a warning is generated if applying
// unary request takes more time than this value.
ExperimentalWarningUnaryRequestDuration time.Duration `json:"experimental-warning-unary-request-duration"`
// ForceNewCluster starts a new cluster even if previously started; unsafe.
ForceNewCluster bool `json:"force-new-cluster"`
@ -447,6 +451,8 @@ func NewConfig() *Config {
MaxRequestBytes: DefaultMaxRequestBytes,
ExperimentalWarningApplyDuration: DefaultWarningApplyDuration,
ExperimentalWarningUnaryRequestDuration: DefaultWarningUnaryRequestDuration,
GRPCKeepAliveMinTime: DefaultGRPCKeepAliveMinTime,
GRPCKeepAliveInterval: DefaultGRPCKeepAliveInterval,
GRPCKeepAliveTimeout: DefaultGRPCKeepAliveTimeout,

View File

@ -221,6 +221,7 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) {
WatchProgressNotifyInterval: cfg.ExperimentalWatchProgressNotifyInterval,
DowngradeCheckTime: cfg.ExperimentalDowngradeCheckTime,
WarningApplyDuration: cfg.ExperimentalWarningApplyDuration,
WarningUnaryRequestDuration: cfg.ExperimentalWarningUnaryRequestDuration,
ExperimentalMemoryMlock: cfg.ExperimentalMemoryMlock,
ExperimentalTxnModeWriteWithSharedBuffer: cfg.ExperimentalTxnModeWriteWithSharedBuffer,
ExperimentalBootstrapDefragThresholdMegabytes: cfg.ExperimentalBootstrapDefragThresholdMegabytes,

View File

@ -286,6 +286,7 @@ func newConfig() *config {
fs.DurationVar(&cfg.ec.ExperimentalWatchProgressNotifyInterval, "experimental-watch-progress-notify-interval", cfg.ec.ExperimentalWatchProgressNotifyInterval, "Duration of periodic watch progress notifications.")
fs.DurationVar(&cfg.ec.ExperimentalDowngradeCheckTime, "experimental-downgrade-check-time", cfg.ec.ExperimentalDowngradeCheckTime, "Duration of time between two downgrade status check.")
fs.DurationVar(&cfg.ec.ExperimentalWarningApplyDuration, "experimental-warning-apply-duration", cfg.ec.ExperimentalWarningApplyDuration, "Time duration after which a warning is generated if request takes more time.")
fs.DurationVar(&cfg.ec.ExperimentalWarningUnaryRequestDuration, "experimental-warning-unary-request-duration", cfg.ec.ExperimentalWarningUnaryRequestDuration, "Time duration after which a warning is generated if a unary request takes more time.")
fs.BoolVar(&cfg.ec.ExperimentalMemoryMlock, "experimental-memory-mlock", cfg.ec.ExperimentalMemoryMlock, "Enable to enforce etcd pages (in particular bbolt) to stay in RAM.")
fs.BoolVar(&cfg.ec.ExperimentalTxnModeWriteWithSharedBuffer, "experimental-txn-mode-write-with-shared-buffer", true, "Enable the write transaction to use a shared buffer in its readonly check operations.")
fs.UintVar(&cfg.ec.ExperimentalBootstrapDefragThresholdMegabytes, "experimental-bootstrap-defrag-threshold-megabytes", 0, "Enable the defrag during etcd server bootstrap on condition that it will free at least the provided threshold of disk space. Needs to be set to non-zero value to take effect.")

View File

@ -246,6 +246,8 @@ Experimental feature:
Enable the write transaction to use a shared buffer in its readonly check operations.
--experimental-bootstrap-defrag-threshold-megabytes
Enable the defrag during etcd server bootstrap on condition that it will free at least the provided threshold of disk space. Needs to be set to non-zero value to take effect.
--experimental-warning-unary-request-duration '300ms'
Set time duration after which a warning is generated if a unary request takes more than this duration.
Unsafe feature:
--force-new-cluster 'false'

View File

@ -33,9 +33,8 @@ import (
)
const (
maxNoLeaderCnt = 3
warnUnaryRequestLatency = 300 * time.Millisecond
snapshotMethod = "/etcdserverpb.Maintenance/Snapshot"
maxNoLeaderCnt = 3
snapshotMethod = "/etcdserverpb.Maintenance/Snapshot"
)
type streamsMap struct {
@ -78,19 +77,19 @@ func newLogUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerIntercepto
resp, err := handler(ctx, req)
lg := s.Logger()
if lg != nil { // acquire stats if debug level is enabled or request is expensive
defer logUnaryRequestStats(ctx, lg, info, startTime, req, resp)
defer logUnaryRequestStats(ctx, lg, s.Cfg.WarningUnaryRequestDuration, info, startTime, req, resp)
}
return resp, err
}
}
func logUnaryRequestStats(ctx context.Context, lg *zap.Logger, info *grpc.UnaryServerInfo, startTime time.Time, req interface{}, resp interface{}) {
func logUnaryRequestStats(ctx context.Context, lg *zap.Logger, warnLatency time.Duration, info *grpc.UnaryServerInfo, startTime time.Time, req interface{}, resp interface{}) {
duration := time.Since(startTime)
var enabledDebugLevel, expensiveRequest bool
if lg.Core().Enabled(zap.DebugLevel) {
enabledDebugLevel = true
}
if duration > warnUnaryRequestLatency {
if duration > warnLatency {
expensiveRequest = true
}
if !enabledDebugLevel && !expensiveRequest {

View File

@ -705,6 +705,7 @@ func mustNewMember(t testutil.TB, mcfg memberConfig) *member {
m.InitialCorruptCheck = true
m.WarningApplyDuration = embed.DefaultWarningApplyDuration
m.WarningUnaryRequestDuration = embed.DefaultWarningUnaryRequestDuration
m.V2Deprecation = config.V2_DEPR_DEFAULT