etcd-mixin:add etcdBackendHighFragmentationRatio alert

Signed-off-by: Mustafa Elbehery <melbeher@redhat.com>
:wq
This commit is contained in:
Mustafa Elbehery 2022-07-13 19:54:09 +02:00
parent 525d53bd41
commit 7de89e8db6
2 changed files with 38 additions and 0 deletions

View File

@ -238,6 +238,21 @@
summary: 'etcd cluster database growing very fast.',
},
},
{
alert: 'etcdDatabaseHighFragmentationRatio',
expr: |||
(last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes[5m]) / last_over_time(etcd_mvcc_db_total_size_in_bytes[5m])) < 0.5
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
annotations: {
description: 'etcd cluster "{{ $labels.%s }}": database size in use on instance {{ $labels.instance }} is {{ $value | humanizePercentage }} of the actual allocated disk space, please run defragmentation (e.g. etcdctl defrag) to retrieve the unused fragmented disk space.' % $._config.clusterLabel,
summary: 'etcd database size in use is less than 50% of the actual allocated storage.',
runbook_url: 'https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation',
},
},
],
},
],

View File

@ -139,3 +139,26 @@ tests:
exp_annotations:
description: 'etcd cluster "etcd": Predicting running out of disk space in the next four hours, based on write observations within the past four hours on etcd instance 10.10.10.0, please check as it might be disruptive.'
summary: 'etcd cluster database growing very fast.'
- interval: 1m
input_series:
- series: 'etcd_mvcc_db_total_size_in_use_in_bytes{job="etcd",instance="10.10.10.0"}'
values: '30000+0x10'
- series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.0"}'
values: '100000+0x10'
- series: 'etcd_mvcc_db_total_size_in_use_in_bytes{job="etcd",instance="10.10.10.1"}'
values: '70000+0x10'
- series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.1"}'
values: '100000+0x10'
alert_rule_test:
- eval_time: 11m
alertname: etcdDatabaseHighFragmentationRatio
exp_alerts:
- exp_labels:
instance: '10.10.10.0'
job: etcd
severity: warning
exp_annotations:
description: 'etcd cluster "etcd": database size in use on instance 10.10.10.0 is 30% of the actual allocated disk space, please run defragmentation (e.g. etcdctl defrag) to retrieve the unused fragmented disk space.'
runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation
summary: 'etcd database size in use is less than 50% of the actual allocated storage.'