From 72c33d8b05e06db16690e5a6aa51ed7b438268b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20R=C3=BCger?= Date: Sun, 6 Feb 2022 22:32:45 +0100 Subject: [PATCH] contrib/mixin: Generate rules, fix tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add Makefile * Make tests runnable * Add generated rule manifest file Signed-off-by: Manuel RĂ¼ger --- .github/workflows/contrib.yaml | 11 +++++++++++ .gitignore | 3 ++- contrib/mixin/Makefile | 23 +++++++++++++++++++++++ contrib/mixin/README.md | 8 ++++++-- contrib/mixin/test.yaml | 34 ++++++++++++++++++++++------------ 5 files changed, 64 insertions(+), 15 deletions(-) create mode 100644 .github/workflows/contrib.yaml create mode 100644 contrib/mixin/Makefile diff --git a/.github/workflows/contrib.yaml b/.github/workflows/contrib.yaml new file mode 100644 index 000000000..612580ce2 --- /dev/null +++ b/.github/workflows/contrib.yaml @@ -0,0 +1,11 @@ +name: Test contrib/mixin +on: [push, pull_request] +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-go@v2 + with: + go-version: "1.17.6" + - run: make -C contrib/mixin tools test diff --git a/.gitignore b/.gitignore index dbb48c6e1..ab1bbe4ce 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ *.test hack/tls-setup/certs .idea +/contrib/mixin/manifests /contrib/raftexample/raftexample /contrib/raftexample/raftexample-* /vendor @@ -22,4 +23,4 @@ hack/tls-setup/certs *.bak .gobincache/ /Documentation/dev-guide/api_reference_v3.md -/Documentation/dev-guide/api_concurrency_reference_v3.md \ No newline at end of file +/Documentation/dev-guide/api_concurrency_reference_v3.md diff --git a/contrib/mixin/Makefile b/contrib/mixin/Makefile new file mode 100644 index 000000000..843215b00 --- /dev/null +++ b/contrib/mixin/Makefile @@ -0,0 +1,23 @@ +.PHONY: tools manifests test clean + +OS := linux +ARCH ?= amd64 +PROMETHEUS_VERSION := 2.33.1 + +tools: + go install github.com/google/go-jsonnet/cmd/jsonnet@latest + go install github.com/brancz/gojsontoyaml@latest + wget -qO- "https://github.com/prometheus/prometheus/releases/download/v${PROMETHEUS_VERSION}/prometheus-${PROMETHEUS_VERSION}.${OS}-${ARCH}.tar.gz" |\ + tar xvz --strip-components=1 -C "$$(go env GOPATH)/bin" prometheus-${PROMETHEUS_VERSION}.${OS}-${ARCH}/promtool + +manifests: manifests/etcd-prometheusRules.yaml + +manifests/etcd-prometheusRules.yaml: + mkdir -p manifests + jsonnet -e '(import "mixin.libsonnet").prometheusAlerts' | gojsontoyaml > manifests/etcd-prometheusRules.yaml + +test: manifests/etcd-prometheusRules.yaml + promtool test rules test.yaml + +clean: + rm -rf manifests/*.yaml diff --git a/contrib/mixin/README.md b/contrib/mixin/README.md index 224066f45..2ec70004c 100644 --- a/contrib/mixin/README.md +++ b/contrib/mixin/README.md @@ -12,11 +12,15 @@ Instructions for use are the same as the [kubernetes-mixin](https://github.com/k ## Testing alerts -Make sure to have [jsonnet](https://jsonnet.org/) and [gojsontoyaml](https://github.com/brancz/gojsontoyaml) installed. +Make sure to have [jsonnet](https://jsonnet.org/) and [gojsontoyaml](https://github.com/brancz/gojsontoyaml) installed. You can fetch it via + +``` +make tools +``` First compile the mixin to a YAML file, which the promtool will read: ``` -jsonnet -e '(import "mixin.libsonnet").prometheusAlerts' | gojsontoyaml > mixin.yaml +make manifests ``` Then run the unit test: diff --git a/contrib/mixin/test.yaml b/contrib/mixin/test.yaml index 24162bd4d..e8f53f550 100644 --- a/contrib/mixin/test.yaml +++ b/contrib/mixin/test.yaml @@ -1,5 +1,5 @@ rule_files: - - mixin.yaml + - manifests/etcd-prometheusRules.yaml evaluation_interval: 1m @@ -86,14 +86,15 @@ tests: exp_annotations: description: 'etcd cluster "etcd": members are down (1).' summary: 'etcd cluster members are down.' + - interval: 1m input_series: - series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.0"}' values: '0 0 2 0 0 1 0 0 0 0 0 0 0 0 0 0' - series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.1"}' - values: '0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0' + values: '0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0' - series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.2"}' - values: '0 0 0 0 0 0 0 0' + values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' alert_rule_test: - eval_time: 10m alertname: etcdHighNumberOfLeaderChanges @@ -111,25 +112,34 @@ tests: - series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.1"}' values: '0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0' - series: 'etcd_server_leader_changes_seen_total{job="etcd",instance="10.10.10.2"}' - values: '0 0 0 0 0 0 0 0' + values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' alert_rule_test: - eval_time: 10m alertname: etcdHighNumberOfLeaderChanges exp_alerts: + - interval: 1m input_series: - - series: '((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100){job="etcd",instance="10.10.10.0"}' - values: '0 10 20 0 0 10 0 0 30 0 0 0 0 0 0 0' - - series: '((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100){job="etcd",instance="10.10.10.1"}' - values: '0 0 10 0 20 0 0 0 0 0 0 0 0 0 0 0' - - series: '((etcd_mvcc_db_total_size_in_bytes/etcd_server_quota_backend_bytes)*100){job="etcd",instance="10.10.10.2"}' - values: '0 0 0 0 0 0 0 0' + - series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.0"}' + values: '0 1 2 0 0 1 0 3 0 0 0 0 0 0 0 0' + - series: 'etcd_server_quota_backend_bytes{job="etcd",instance="10.10.10.0"}' + values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1' + - series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.1"}' + values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' + - series: 'etcd_server_quota_backend_bytes{job="etcd",instance="10.10.10.1"}' + values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1' + - series: 'etcd_mvcc_db_total_size_in_bytes{job="etcd",instance="10.10.10.2"}' + values: '0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0' + - series: 'etcd_server_quota_backend_bytes{job="etcd",instance="10.10.10.2"}' + values: '1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1' alert_rule_test: - - eval_time: 10m + - eval_time: 11m alertname: etcdExcessiveDatabaseGrowth exp_alerts: - exp_labels: + instance: '10.10.10.0' job: etcd severity: warning exp_annotations: - message: 'etcd cluster "etcd": Observed surge in etcd writes leading to 50% increase in database size over the past four hours, please check as it might be disruptive.' + description: 'etcd cluster "etcd": Observed surge in etcd writes leading to 50% increase in database size over the past four hours on etcd instance 10.10.10.0, please check as it might be disruptive.' + summary: 'etcd cluster database growing very fast.'