Documentation/op-guide: Remove HTTP alerts

The HTTP metrics have been deprecated and removed. In other places all
the HTTP related alerts have already been removed, this is just a clean
up of the remaining places these were left.
This commit is contained in:
Frederic Branczyk
2018-07-04 09:04:47 +02:00
parent e4425ee79f
commit 67ecea9709

View File

@@ -79,35 +79,6 @@ ANNOTATIONS {
description = "on etcd instance {{ $labels.instance }} gRPC requests to {{ $labels.grpc_method }} are slow",
}
# HTTP requests alerts
# ====================
# alert if more than 1% of requests to an HTTP endpoint have failed within the last 5 minutes
ALERT HighNumberOfFailedHTTPRequests
IF 100 * (sum(rate(grpc_server_handled_total{grpc_code!="OK",job="etcd"}[5m])) BY (grpc_service, grpc_method)
/ sum(rate(grpc_server_handled_total{job="etcd"}[5m])) BY (grpc_service, grpc_method)) > 1
FOR 10m
LABELS {
severity = "warning"
}
ANNOTATIONS {
summary = "a high number of HTTP requests are failing",
description = "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}",
}
# alert if more than 5% of requests to an HTTP endpoint have failed within the last 5 minutes
ALERT HighNumberOfFailedHTTPRequests
IF 100 * (sum(rate(grpc_server_handled_total{grpc_code!="OK",job="etcd"}[5m])) BY (grpc_service, grpc_method)
/ sum(rate(grpc_server_handled_total{job="etcd"}[5m])) BY (grpc_service, grpc_method)) > 5
FOR 5m
LABELS {
severity = "critical"
}
ANNOTATIONS {
summary = "a high number of HTTP requests are failing",
description = "{{ $value }}% of requests for {{ $labels.method }} failed on etcd instance {{ $labels.instance }}",
}
# file descriptor alerts
# ======================