etcd/contrib/mixin/dashboards/targets.libsonnet
Vitaly Zhuravlev aa21d536d6 Keep grafana7x dashboard. Enable with grafana7x=true in config
Signed-off-by: Vitaly Zhuravlev <v-zhuravlev@users.noreply.github.com>
2023-08-28 13:42:02 +00:00

105 lines
5.1 KiB
Jsonnet

local g = import './g.libsonnet';
local prometheusQuery = g.query.prometheus;
function(variables, config) {
up:
prometheusQuery.new(
'$' + variables.datasource.name,
'sum(etcd_server_has_leader{%s, %s="$cluster"})' % [config.etcd_selector, config.clusterLabel]
)
+ prometheusQuery.withLegendFormat(|||
{{cluster}} - {{namespace}}
|||),
rpcRate:
prometheusQuery.new(
'$' + variables.datasource.name,
'sum(rate(grpc_server_started_total{%s, %s="$cluster",grpc_type="unary"}[$__rate_interval]))' % [config.etcd_selector, config.clusterLabel]
)
+ prometheusQuery.withLegendFormat('RPC rate'),
rpcFailedRate:
prometheusQuery.new(
'$' + variables.datasource.name,
'sum(rate(grpc_server_handled_total{%s, %s="$cluster",grpc_type="unary",grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[$__rate_interval]))' % [config.etcd_selector, config.clusterLabel]
)
+ prometheusQuery.withLegendFormat('RPC failed rate'),
watchStreams:
prometheusQuery.new(
'$' + variables.datasource.name,
'sum(grpc_server_started_total{%(etcd_selector)s,%(clusterLabel)s="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{%(clusterLabel)s="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})' % config
)
+ prometheusQuery.withLegendFormat('Watch streams'),
leaseStreams:
prometheusQuery.new(
'$' + variables.datasource.name,
'sum(grpc_server_started_total{%(etcd_selector)s,%(clusterLabel)s="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{%(clusterLabel)s="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})' % config
)
+ prometheusQuery.withLegendFormat('Lease streams'),
dbSize:
prometheusQuery.new(
'$' + variables.datasource.name,
'etcd_mvcc_db_total_size_in_bytes{%s, %s="$cluster"}' % [config.etcd_selector, config.clusterLabel],
)
+ prometheusQuery.withLegendFormat('{{instance}} DB size'),
walFsync:
prometheusQuery.new(
'$' + variables.datasource.name,
'histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{%s, %s="$cluster"}[$__rate_interval])) by (instance, le))' % [config.etcd_selector, config.clusterLabel],
)
+ prometheusQuery.withLegendFormat('{{instance}} WAL fsync'),
dbFsync:
prometheusQuery.new(
'$' + variables.datasource.name,
'histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{%s, %s="$cluster"}[$__rate_interval])) by (instance, le))' % [config.etcd_selector, config.clusterLabel],
)
+ prometheusQuery.withLegendFormat('{{instance}} DB fsync'),
memory:
prometheusQuery.new(
'$' + variables.datasource.name,
'process_resident_memory_bytes{%s, %s="$cluster"}' % [config.etcd_selector, config.clusterLabel],
)
+ prometheusQuery.withLegendFormat('{{instance}} resident memory'),
clientTrafficIn:
prometheusQuery.new(
'$' + variables.datasource.name,
'rate(etcd_network_client_grpc_received_bytes_total{%s, %s="$cluster"}[$__rate_interval])' % [config.etcd_selector, config.clusterLabel],
)
+ prometheusQuery.withLegendFormat('{{instance}} client traffic in'),
clientTrafficOut:
prometheusQuery.new(
'$' + variables.datasource.name,
'rate(etcd_network_client_grpc_sent_bytes_total{%s, %s="$cluster"}[$__rate_interval])' % [config.etcd_selector, config.clusterLabel],
)
+ prometheusQuery.withLegendFormat('{{instance}} client traffic out'),
peerTrafficIn:
prometheusQuery.new(
'$' + variables.datasource.name,
'sum(rate(etcd_network_peer_received_bytes_total{%s, %s="$cluster"}[$__rate_interval])) by (instance)' % [config.etcd_selector, config.clusterLabel],
)
+ prometheusQuery.withLegendFormat('{{instance}} peer traffic in'),
peerTrafficOut:
prometheusQuery.new(
'$' + variables.datasource.name,
'sum(rate(etcd_network_peer_sent_bytes_total{%s, %s="$cluster"}[$__rate_interval])) by (instance)' % [config.etcd_selector, config.clusterLabel],
)
+ prometheusQuery.withLegendFormat('{{instance}} peer traffic out'),
raftProposals:
prometheusQuery.new(
'$' + variables.datasource.name,
'changes(etcd_server_leader_changes_seen_total{%s, %s="$cluster"}[1d])' % [config.etcd_selector, config.clusterLabel],
)
+ prometheusQuery.withLegendFormat('{{instance}} total leader elections per day'),
leaderElections:
prometheusQuery.new(
'$' + variables.datasource.name,
'changes(etcd_server_leader_changes_seen_total{%s, %s="$cluster"}[1d])' % [config.etcd_selector, config.clusterLabel],
)
+ prometheusQuery.withLegendFormat('{{instance}} total leader elections per day'),
peerRtt:
prometheusQuery.new(
'$' + variables.datasource.name,
'histogram_quantile(0.99, sum by (instance, le) (rate(etcd_network_peer_round_trip_time_seconds_bucket{%s, %s="$cluster"}[$__rate_interval])))' % [config.etcd_selector, config.clusterLabel],
)
+ prometheusQuery.withLegendFormat('{{instance}} peer round trip time'),
}