mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Cancel watch if cluster not healthy before or after injecting failpoints.
Signed-off-by: James Blair <mail@jamesblair.net>
This commit is contained in:
parent
05e2910f15
commit
1227754284
@ -23,6 +23,7 @@ import (
|
||||
"time"
|
||||
|
||||
"go.uber.org/zap"
|
||||
healthpb "google.golang.org/grpc/health/grpc_health_v1"
|
||||
|
||||
clientv3 "go.etcd.io/etcd/client/v3"
|
||||
"go.etcd.io/etcd/tests/v3/framework/e2e"
|
||||
@ -98,18 +99,62 @@ func triggerFailpoints(ctx context.Context, t *testing.T, lg *zap.Logger, clus *
|
||||
}
|
||||
for successes < config.count && failures < config.retries {
|
||||
time.Sleep(config.waitBetweenTriggers)
|
||||
lg.Info("Triggering failpoint\n", zap.String("failpoint", config.failpoint.Name()))
|
||||
|
||||
lg.Info("Verifying cluster health before failpoint", zap.String("failpoint", config.failpoint.Name()))
|
||||
if err = verifyClusterHealth(ctx, t, clus); err != nil {
|
||||
t.Errorf("failed to verify cluster health before failpoint injection, err: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
lg.Info("Triggering failpoint", zap.String("failpoint", config.failpoint.Name()))
|
||||
err = config.failpoint.Trigger(ctx, t, lg, clus)
|
||||
if err != nil {
|
||||
lg.Info("Failed to trigger failpoint", zap.String("failpoint", config.failpoint.Name()), zap.Error(err))
|
||||
failures++
|
||||
continue
|
||||
}
|
||||
|
||||
lg.Info("Verifying cluster health after failpoint", zap.String("failpoint", config.failpoint.Name()))
|
||||
if err = verifyClusterHealth(ctx, t, clus); err != nil {
|
||||
t.Errorf("failed to verify cluster health after failpoint injection, err: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
successes++
|
||||
}
|
||||
if successes < config.count || failures >= config.retries {
|
||||
t.Errorf("failed to trigger failpoints enough times, err: %v", err)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func verifyClusterHealth(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster) error {
|
||||
for i := 0; i < len(clus.Procs); i++ {
|
||||
clusterClient, err := clientv3.New(clientv3.Config{
|
||||
Endpoints: clus.Procs[i].EndpointsGRPC(),
|
||||
Logger: zap.NewNop(),
|
||||
DialKeepAliveTime: 1 * time.Millisecond,
|
||||
DialKeepAliveTimeout: 5 * time.Millisecond,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error creating client for cluster %s: %v", clus.Procs[i].Config().Name, err)
|
||||
}
|
||||
defer clusterClient.Close()
|
||||
|
||||
cli := healthpb.NewHealthClient(clusterClient.ActiveConnection())
|
||||
resp, err := cli.Check(ctx, &healthpb.HealthCheckRequest{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error checking member %s health: %v", clus.Procs[i].Config().Name, err)
|
||||
}
|
||||
if resp.Status != healthpb.HealthCheckResponse_SERVING {
|
||||
return fmt.Errorf("Member %s health status expected %s, got %s",
|
||||
clus.Procs[i].Config().Name,
|
||||
healthpb.HealthCheckResponse_SERVING,
|
||||
resp.Status)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type FailpointConfig struct {
|
||||
|
Loading…
x
Reference in New Issue
Block a user