mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
105 lines
5.1 KiB
Jsonnet
105 lines
5.1 KiB
Jsonnet
local g = import './g.libsonnet';
|
|
local prometheusQuery = g.query.prometheus;
|
|
|
|
function(variables, config) {
|
|
up:
|
|
prometheusQuery.new(
|
|
'$' + variables.datasource.name,
|
|
'sum(etcd_server_has_leader{%s, %s="$cluster"})' % [config.etcd_selector, config.clusterLabel]
|
|
)
|
|
+ prometheusQuery.withLegendFormat(|||
|
|
{{cluster}} - {{namespace}}
|
|
|||),
|
|
|
|
rpcRate:
|
|
prometheusQuery.new(
|
|
'$' + variables.datasource.name,
|
|
'sum(rate(grpc_server_started_total{%s, %s="$cluster",grpc_type="unary"}[$__rate_interval]))' % [config.etcd_selector, config.clusterLabel]
|
|
)
|
|
+ prometheusQuery.withLegendFormat('RPC rate'),
|
|
rpcFailedRate:
|
|
prometheusQuery.new(
|
|
'$' + variables.datasource.name,
|
|
'sum(rate(grpc_server_handled_total{%s, %s="$cluster",grpc_type="unary",grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[$__rate_interval]))' % [config.etcd_selector, config.clusterLabel]
|
|
)
|
|
+ prometheusQuery.withLegendFormat('RPC failed rate'),
|
|
watchStreams:
|
|
prometheusQuery.new(
|
|
'$' + variables.datasource.name,
|
|
'sum(grpc_server_started_total{%(etcd_selector)s,%(clusterLabel)s="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{%(clusterLabel)s="$cluster",grpc_service="etcdserverpb.Watch",grpc_type="bidi_stream"})' % config
|
|
)
|
|
+ prometheusQuery.withLegendFormat('Watch streams'),
|
|
leaseStreams:
|
|
prometheusQuery.new(
|
|
'$' + variables.datasource.name,
|
|
'sum(grpc_server_started_total{%(etcd_selector)s,%(clusterLabel)s="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"}) - sum(grpc_server_handled_total{%(clusterLabel)s="$cluster",grpc_service="etcdserverpb.Lease",grpc_type="bidi_stream"})' % config
|
|
)
|
|
+ prometheusQuery.withLegendFormat('Lease streams'),
|
|
dbSize:
|
|
prometheusQuery.new(
|
|
'$' + variables.datasource.name,
|
|
'etcd_mvcc_db_total_size_in_bytes{%s, %s="$cluster"}' % [config.etcd_selector, config.clusterLabel],
|
|
)
|
|
+ prometheusQuery.withLegendFormat('{{instance}} DB size'),
|
|
walFsync:
|
|
prometheusQuery.new(
|
|
'$' + variables.datasource.name,
|
|
'histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{%s, %s="$cluster"}[$__rate_interval])) by (instance, le))' % [config.etcd_selector, config.clusterLabel],
|
|
)
|
|
+ prometheusQuery.withLegendFormat('{{instance}} WAL fsync'),
|
|
dbFsync:
|
|
prometheusQuery.new(
|
|
'$' + variables.datasource.name,
|
|
'histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{%s, %s="$cluster"}[$__rate_interval])) by (instance, le))' % [config.etcd_selector, config.clusterLabel],
|
|
)
|
|
+ prometheusQuery.withLegendFormat('{{instance}} DB fsync'),
|
|
memory:
|
|
prometheusQuery.new(
|
|
'$' + variables.datasource.name,
|
|
'process_resident_memory_bytes{%s, %s="$cluster"}' % [config.etcd_selector, config.clusterLabel],
|
|
)
|
|
+ prometheusQuery.withLegendFormat('{{instance}} resident memory'),
|
|
clientTrafficIn:
|
|
prometheusQuery.new(
|
|
'$' + variables.datasource.name,
|
|
'rate(etcd_network_client_grpc_received_bytes_total{%s, %s="$cluster"}[$__rate_interval])' % [config.etcd_selector, config.clusterLabel],
|
|
)
|
|
+ prometheusQuery.withLegendFormat('{{instance}} client traffic in'),
|
|
clientTrafficOut:
|
|
prometheusQuery.new(
|
|
'$' + variables.datasource.name,
|
|
'rate(etcd_network_client_grpc_sent_bytes_total{%s, %s="$cluster"}[$__rate_interval])' % [config.etcd_selector, config.clusterLabel],
|
|
)
|
|
+ prometheusQuery.withLegendFormat('{{instance}} client traffic out'),
|
|
peerTrafficIn:
|
|
prometheusQuery.new(
|
|
'$' + variables.datasource.name,
|
|
'sum(rate(etcd_network_peer_received_bytes_total{%s, %s="$cluster"}[$__rate_interval])) by (instance)' % [config.etcd_selector, config.clusterLabel],
|
|
)
|
|
+ prometheusQuery.withLegendFormat('{{instance}} peer traffic in'),
|
|
peerTrafficOut:
|
|
prometheusQuery.new(
|
|
'$' + variables.datasource.name,
|
|
'sum(rate(etcd_network_peer_sent_bytes_total{%s, %s="$cluster"}[$__rate_interval])) by (instance)' % [config.etcd_selector, config.clusterLabel],
|
|
)
|
|
+ prometheusQuery.withLegendFormat('{{instance}} peer traffic out'),
|
|
raftProposals:
|
|
prometheusQuery.new(
|
|
'$' + variables.datasource.name,
|
|
'changes(etcd_server_leader_changes_seen_total{%s, %s="$cluster"}[1d])' % [config.etcd_selector, config.clusterLabel],
|
|
)
|
|
+ prometheusQuery.withLegendFormat('{{instance}} total leader elections per day'),
|
|
leaderElections:
|
|
prometheusQuery.new(
|
|
'$' + variables.datasource.name,
|
|
'changes(etcd_server_leader_changes_seen_total{%s, %s="$cluster"}[1d])' % [config.etcd_selector, config.clusterLabel],
|
|
)
|
|
+ prometheusQuery.withLegendFormat('{{instance}} total leader elections per day'),
|
|
peerRtt:
|
|
prometheusQuery.new(
|
|
'$' + variables.datasource.name,
|
|
'histogram_quantile(0.99, sum by (instance, le) (rate(etcd_network_peer_round_trip_time_seconds_bucket{%s, %s="$cluster"}[$__rate_interval])))' % [config.etcd_selector, config.clusterLabel],
|
|
)
|
|
+ prometheusQuery.withLegendFormat('{{instance}} peer round trip time'),
|
|
}
|