From 7de89e8db604ecb8253f36ea01636149be8605bb Mon Sep 17 00:00:00 2001 From: Mustafa Elbehery Date: Wed, 13 Jul 2022 19:54:09 +0200 Subject: [PATCH] etcd-mixin:add etcdBackendHighFragmentationRatio alert Signed-off-by: Mustafa Elbehery :wq --- contrib/mixin/mixin.libsonnet | 15 +++++++++++++++ contrib/mixin/test.yaml | 23 +++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/contrib/mixin/mixin.libsonnet b/contrib/mixin/mixin.libsonnet index 4311a4b9d..83283f7d5 100644 --- a/contrib/mixin/mixin.libsonnet +++ b/contrib/mixin/mixin.libsonnet @@ -238,6 +238,21 @@ summary: 'etcd cluster database growing very fast.', }, }, + { + alert: 'etcdDatabaseHighFragmentationRatio', + expr: ||| + (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes[5m]) / last_over_time(etcd_mvcc_db_total_size_in_bytes[5m])) < 0.5 + ||| % $._config, + 'for': '10m', + labels: { + severity: 'warning', + }, + annotations: { + description: 'etcd cluster "{{ $labels.%s }}": database size in use on instance {{ $labels.instance }} is {{ $value | humanizePercentage }} of the actual allocated disk space, please run defragmentation (e.g. etcdctl defrag) to retrieve the unused fragmented disk space.' % $._config.clusterLabel, + summary: 'etcd database size in use is less than 50% of the actual allocated storage.', + runbook_url: 'https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation', + }, + }, ], }, ], diff --git a/contrib/mixin/test.yaml b/contrib/mixin/test.yaml index 3139946af..8cf18a0e0 100644 --- a/contrib/mixin/test.yaml +++ b/contrib/mixin/test.yaml @@ -139,3 +139,26 @@ tests: exp_annotations: description: 'etcd cluster "etcd": Predicting running out of disk space in the next four hours, based on write observations within the past four hours on etcd instance 10.10.10.0, please check as it might be disruptive.' summary: 'etcd cluster database growing very fast.' + + - interval: 1m + input_series: + - series: 'etcd_mvcc_db_total_size_in_use_in_bytes{job="etcd",instance="10.10.10.0"}' + values: '30000+0x10' + - series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.0"}' + values: '100000+0x10' + - series: 'etcd_mvcc_db_total_size_in_use_in_bytes{job="etcd",instance="10.10.10.1"}' + values: '70000+0x10' + - series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.1"}' + values: '100000+0x10' + alert_rule_test: + - eval_time: 11m + alertname: etcdDatabaseHighFragmentationRatio + exp_alerts: + - exp_labels: + instance: '10.10.10.0' + job: etcd + severity: warning + exp_annotations: + description: 'etcd cluster "etcd": database size in use on instance 10.10.10.0 is 30% of the actual allocated disk space, please run defragmentation (e.g. etcdctl defrag) to retrieve the unused fragmented disk space.' + runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation + summary: 'etcd database size in use is less than 50% of the actual allocated storage.'