contrib/mixin/mixin.libsonnet: Adjust gRPC failed requests

OK is not the only one that is allowed, this before also captured
context canceled, NotFound, and other non error requests.
This commit is contained in:
Lili Cosic 2021-06-21 11:42:08 +02:00
parent f992d697ed
commit f00231951d

View File

@ -88,7 +88,7 @@
{ {
alert: 'etcdHighNumberOfFailedGRPCRequests', alert: 'etcdHighNumberOfFailedGRPCRequests',
expr: ||| expr: |||
100 * sum(rate(grpc_server_handled_total{%(etcd_selector)s, grpc_code!="OK"}[5m])) without (grpc_type, grpc_code) 100 * sum(rate(grpc_server_handled_total{%(etcd_selector)s, grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code)
/ /
sum(rate(grpc_server_handled_total{%(etcd_selector)s}[5m])) without (grpc_type, grpc_code) sum(rate(grpc_server_handled_total{%(etcd_selector)s}[5m])) without (grpc_type, grpc_code)
> 1 > 1
@ -105,7 +105,7 @@
{ {
alert: 'etcdHighNumberOfFailedGRPCRequests', alert: 'etcdHighNumberOfFailedGRPCRequests',
expr: ||| expr: |||
100 * sum(rate(grpc_server_handled_total{%(etcd_selector)s, grpc_code!="OK"}[5m])) without (grpc_type, grpc_code) 100 * sum(rate(grpc_server_handled_total{%(etcd_selector)s, grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code)
/ /
sum(rate(grpc_server_handled_total{%(etcd_selector)s}[5m])) without (grpc_type, grpc_code) sum(rate(grpc_server_handled_total{%(etcd_selector)s}[5m])) without (grpc_type, grpc_code)
> 5 > 5
@ -369,7 +369,7 @@
step: 2, step: 2,
}, },
{ {
expr: 'sum(rate(grpc_server_handled_total{job="$cluster",grpc_type="unary",grpc_code!="OK"}[5m]))', expr: 'sum(rate(grpc_server_handled_total{job="$cluster",grpc_type="unary",grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m]))',
format: 'time_series', format: 'time_series',
intervalFactor: 2, intervalFactor: 2,
legendFormat: 'RPC Failed Rate', legendFormat: 'RPC Failed Rate',