Merge pull request #12080 from wking/raise-etcd-leader-changes-to-four

Documentation/etcd-mixin: Raise etcdHighNumberOfLeaderChanges threshold to 4
This commit is contained in:
Sam Batschelet 2020-07-08 08:37:50 -04:00 committed by GitHub
commit 429826b467
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 5 additions and 5 deletions

View File

@ -57,7 +57,7 @@
{
alert: 'etcdHighNumberOfLeaderChanges',
expr: |||
increase((max by (job) (etcd_server_leader_changes_seen_total{%(etcd_selector)s}) or 0*absent(etcd_server_leader_changes_seen_total{%(etcd_selector)s}))[15m:1m]) >= 3
increase((max by (job) (etcd_server_leader_changes_seen_total{%(etcd_selector)s}) or 0*absent(etcd_server_leader_changes_seen_total{%(etcd_selector)s}))[15m:1m]) >= 4
||| % $._config,
'for': '5m',
labels: {

View File

@ -99,7 +99,7 @@ tests:
job: etcd
severity: warning
exp_annotations:
message: 'etcd cluster "etcd": 3 leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.'
message: 'etcd cluster "etcd": 4 leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.'
- interval: 1m
input_series:
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.0"}'

View File

@ -29,13 +29,13 @@ ANNOTATIONS {
# alert if there are lots of leader changes
ALERT HighNumberOfLeaderChanges
IF increase(etcd_server_leader_changes_seen_total{job="etcd"}[1h]) > 3
IF increase((max by (job) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 4
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "a high number of leader changes within the etcd cluster are happening",
description = "etcd instance {{ $labels.instance }} has seen {{ $value }} leader changes within the last hour",
description = "etcd cluster "{{ $labels.job }}": {{ $value }} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.",
}
# gRPC request alerts

View File

@ -41,7 +41,7 @@ groups:
the last 15 minutes. Frequent elections may be a sign of insufficient resources,
high network latency, or disruptions by other components and should be investigated.'
expr: |
increase((max by (job) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 3
increase((max by (job) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 4
for: 5m
labels:
severity: warning