diff --git a/contrib/mixin/mixin.libsonnet b/contrib/mixin/mixin.libsonnet index 27e266545..4311a4b9d 100644 --- a/contrib/mixin/mixin.libsonnet +++ b/contrib/mixin/mixin.libsonnet @@ -227,14 +227,14 @@ { alert: 'etcdExcessiveDatabaseGrowth', expr: ||| - increase(((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100)[240m:1m]) > 50 + predict_linear(etcd_mvcc_db_total_size_in_bytes[4h], 4*60*60) > etcd_server_quota_backend_bytes ||| % $._config, 'for': '10m', labels: { severity: 'warning', }, annotations: { - description: 'etcd cluster "{{ $labels.%s }}": Observed surge in etcd writes leading to 50%% increase in database size over the past four hours on etcd instance {{ $labels.instance }}, please check as it might be disruptive.' % $._config.clusterLabel, + description: 'etcd cluster "{{ $labels.%s }}": Predicting running out of disk space in the next four hours, based on write observations within the past four hours on etcd instance {{ $labels.instance }}, please check as it might be disruptive.' % $._config.clusterLabel, summary: 'etcd cluster database growing very fast.', }, }, diff --git a/contrib/mixin/test.yaml b/contrib/mixin/test.yaml index e8f53f550..3139946af 100644 --- a/contrib/mixin/test.yaml +++ b/contrib/mixin/test.yaml @@ -121,17 +121,13 @@ tests: - interval: 1m input_series: - series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.0"}' - values: '0 1 2 0 0 1 0 3 0 0 0 0 0 0 0 0' + values: '0+8192x240' - series: 'etcd_server_quota_backend_bytes{job="etcd",instance="10.10.10.0"}' - values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1' + values: '524288+0x240' - series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.1"}' - values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' + values: '0+1024x240' - series: 'etcd_server_quota_backend_bytes{job="etcd",instance="10.10.10.1"}' - values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1' - - series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.2"}' - values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' - - series: 'etcd_server_quota_backend_bytes{job="etcd",instance="10.10.10.2"}' - values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1' + values: '524288+0x240' alert_rule_test: - eval_time: 11m alertname: etcdExcessiveDatabaseGrowth @@ -141,5 +137,5 @@ tests: job: etcd severity: warning exp_annotations: - description: 'etcd cluster "etcd": Observed surge in etcd writes leading to 50% increase in database size over the past four hours on etcd instance 10.10.10.0, please check as it might be disruptive.' + description: 'etcd cluster "etcd": Predicting running out of disk space in the next four hours, based on write observations within the past four hours on etcd instance 10.10.10.0, please check as it might be disruptive.' summary: 'etcd cluster database growing very fast.'