Merge pull request #13671 from mrueg/mixin-generate-manifests

contrib/mixin: Generate rules, fix tests
This commit is contained in:
Marek Siarkowicz 2022-02-15 23:06:52 +01:00 committed by GitHub
commit e814f6f78a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 64 additions and 15 deletions

11
.github/workflows/contrib.yaml vendored Normal file
View File

@ -0,0 +1,11 @@
name: Test contrib/mixin
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-go@v2
with:
go-version: "1.17.6"
- run: make -C contrib/mixin tools test

3
.gitignore vendored
View File

@ -14,6 +14,7 @@
*.test
hack/tls-setup/certs
.idea
/contrib/mixin/manifests
/contrib/raftexample/raftexample
/contrib/raftexample/raftexample-*
/vendor
@ -22,4 +23,4 @@ hack/tls-setup/certs
*.bak
.gobincache/
/Documentation/dev-guide/api_reference_v3.md
/Documentation/dev-guide/api_concurrency_reference_v3.md
/Documentation/dev-guide/api_concurrency_reference_v3.md

23
contrib/mixin/Makefile Normal file
View File

@ -0,0 +1,23 @@
.PHONY: tools manifests test clean
OS := linux
ARCH ?= amd64
PROMETHEUS_VERSION := 2.33.1
tools:
go install github.com/google/go-jsonnet/cmd/jsonnet@latest
go install github.com/brancz/gojsontoyaml@latest
wget -qO- "https://github.com/prometheus/prometheus/releases/download/v${PROMETHEUS_VERSION}/prometheus-${PROMETHEUS_VERSION}.${OS}-${ARCH}.tar.gz" |\
tar xvz --strip-components=1 -C "$$(go env GOPATH)/bin" prometheus-${PROMETHEUS_VERSION}.${OS}-${ARCH}/promtool
manifests: manifests/etcd-prometheusRules.yaml
manifests/etcd-prometheusRules.yaml:
mkdir -p manifests
jsonnet -e '(import "mixin.libsonnet").prometheusAlerts' | gojsontoyaml > manifests/etcd-prometheusRules.yaml
test: manifests/etcd-prometheusRules.yaml
promtool test rules test.yaml
clean:
rm -rf manifests/*.yaml

View File

@ -12,11 +12,15 @@ Instructions for use are the same as the [kubernetes-mixin](https://github.com/k
## Testing alerts
Make sure to have [jsonnet](https://jsonnet.org/) and [gojsontoyaml](https://github.com/brancz/gojsontoyaml) installed.
Make sure to have [jsonnet](https://jsonnet.org/) and [gojsontoyaml](https://github.com/brancz/gojsontoyaml) installed. You can fetch it via
```
make tools
```
First compile the mixin to a YAML file, which the promtool will read:
```
jsonnet -e '(import "mixin.libsonnet").prometheusAlerts' | gojsontoyaml > mixin.yaml
make manifests
```
Then run the unit test:

View File

@ -1,5 +1,5 @@
rule_files:
- mixin.yaml
- manifests/etcd-prometheusRules.yaml
evaluation_interval: 1m
@ -86,14 +86,15 @@ tests:
exp_annotations:
description: 'etcd cluster "etcd": members are down (1).'
summary: 'etcd cluster members are down.'
- interval: 1m
input_series:
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.0"}'
values: '0 0 2 0 0 1 0 0 0 0 0 0 0 0 0 0'
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.1"}'
values: '0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0'
values: '0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0'
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.2"}'
values: '0 0 0 0 0 0 0 0'
values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
alert_rule_test:
- eval_time: 10m
alertname: etcdHighNumberOfLeaderChanges
@ -111,25 +112,34 @@ tests:
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.1"}'
values: '0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0'
- series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.2"}'
values: '0 0 0 0 0 0 0 0'
values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
alert_rule_test:
- eval_time: 10m
alertname: etcdHighNumberOfLeaderChanges
exp_alerts:
- interval: 1m
input_series:
- series: '((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100){job="etcd",instance="10.10.10.0"}'
values: '0 10 20 0 0 10 0 0 30 0 0 0 0 0 0 0'
- series: '((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100){job="etcd",instance="10.10.10.1"}'
values: '0 0 10 0 20 0 0 0 0 0 0 0 0 0 0 0'
- series: '((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100){job="etcd",instance="10.10.10.2"}'
values: '0 0 0 0 0 0 0 0'
- series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.0"}'
values: '0 1 2 0 0 1 0 3 0 0 0 0 0 0 0 0'
- series: 'etcd_server_quota_backend_bytes{job="etcd",instance="10.10.10.0"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
- series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.1"}'
values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
- series: 'etcd_server_quota_backend_bytes{job="etcd",instance="10.10.10.1"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
- series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.2"}'
values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0'
- series: 'etcd_server_quota_backend_bytes{job="etcd",instance="10.10.10.2"}'
values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1'
alert_rule_test:
- eval_time: 10m
- eval_time: 11m
alertname: etcdExcessiveDatabaseGrowth
exp_alerts:
- exp_labels:
instance: '10.10.10.0'
job: etcd
severity: warning
exp_annotations:
message: 'etcd cluster "etcd": Observed surge in etcd writes leading to 50% increase in database size over the past four hours, please check as it might be disruptive.'
description: 'etcd cluster "etcd": Observed surge in etcd writes leading to 50% increase in database size over the past four hours on etcd instance 10.10.10.0, please check as it might be disruptive.'
summary: 'etcd cluster database growing very fast.'