From dad8208a4ddd46bcbab937d0c2cac5fedf5a3113 Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Mon, 5 Sep 2022 17:29:07 +0200 Subject: [PATCH] raft: avoid panics during *node tests `StartNode` runs a naked goroutine, so it's impossible to test against it in a way that will reliably produce contained test failures when assertions are hit on the `(*node).run` goroutine. This commit introduces a harness that we can use in tests to wrap this goroutine and allow it to defer errors to `*testing.T`. Note that tests of `Node` still need to be architected carefully since it's easy to produce a deadlock in them should things not go exactly as planned. Signed-off-by: Tobias Grieger --- raft/node.go | 18 ++++--- raft/node_test.go | 7 +-- raft/node_util_test.go | 109 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 12 deletions(-) create mode 100644 raft/node_util_test.go diff --git a/raft/node.go b/raft/node.go index d374b6c0c..381000621 100644 --- a/raft/node.go +++ b/raft/node.go @@ -211,11 +211,7 @@ type Peer struct { Context []byte } -// StartNode returns a new Node given configuration and a list of raft peers. -// It appends a ConfChangeAddNode entry for each given peer to the initial log. -// -// Peers must not be zero length; call RestartNode in that case. -func StartNode(c *Config, peers []Peer) Node { +func setupNode(c *Config, peers []Peer) *node { if len(peers) == 0 { panic("no peers given; use RestartNode instead") } @@ -229,11 +225,19 @@ func StartNode(c *Config, peers []Peer) Node { } n := newNode(rn) - - go n.run() return &n } +// StartNode returns a new Node given configuration and a list of raft peers. +// It appends a ConfChangeAddNode entry for each given peer to the initial log. +// +// Peers must not be zero length; call RestartNode in that case. +func StartNode(c *Config, peers []Peer) Node { + n := setupNode(c, peers) + go n.run() + return n +} + // RestartNode is similar to StartNode but does not take a list of peers. // The current membership of the cluster will be restored from the Storage. // If the caller has an existing state machine, pass in the last log index that diff --git a/raft/node_test.go b/raft/node_test.go index 2c2ff8018..b732174f1 100644 --- a/raft/node_test.go +++ b/raft/node_test.go @@ -740,9 +740,6 @@ func TestNodeRestartFromSnapshot(t *testing.T) { } func TestNodeAdvance(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - storage := NewMemoryStorage() c := &Config{ ID: 1, @@ -752,8 +749,8 @@ func TestNodeAdvance(t *testing.T) { MaxSizePerMsg: noLimit, MaxInflightMsgs: 256, } - n := StartNode(c, []Peer{{ID: 1}}) - defer n.Stop() + ctx, cancel, n := newNodeTestHarness(t, context.Background(), c, Peer{ID: 1}) + defer cancel() rd := <-n.Ready() storage.Append(rd.Entries) n.Advance() diff --git a/raft/node_util_test.go b/raft/node_util_test.go new file mode 100644 index 000000000..fb3473d12 --- /dev/null +++ b/raft/node_util_test.go @@ -0,0 +1,109 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package raft + +import ( + "context" + "fmt" + "testing" + "time" +) + +type nodeTestHarness struct { + *node + t *testing.T +} + +func (l *nodeTestHarness) Debug(v ...interface{}) { + l.t.Log(v...) +} + +func (l *nodeTestHarness) Debugf(format string, v ...interface{}) { + l.t.Logf(format, v...) +} + +func (l *nodeTestHarness) Error(v ...interface{}) { + l.t.Error(v...) +} + +func (l *nodeTestHarness) Errorf(format string, v ...interface{}) { + l.t.Errorf(format, v...) +} + +func (l *nodeTestHarness) Info(v ...interface{}) { + l.t.Log(v...) +} + +func (l *nodeTestHarness) Infof(format string, v ...interface{}) { + l.t.Logf(format, v...) +} + +func (l *nodeTestHarness) Warning(v ...interface{}) { + l.t.Log(v...) +} + +func (l *nodeTestHarness) Warningf(format string, v ...interface{}) { + l.t.Logf(format, v...) +} + +func (l *nodeTestHarness) Fatal(v ...interface{}) { + l.t.Error(v...) + panic(v) +} + +func (l *nodeTestHarness) Fatalf(format string, v ...interface{}) { + l.t.Errorf(format, v...) + panic(fmt.Sprintf(format, v...)) +} + +func (l *nodeTestHarness) Panic(v ...interface{}) { + l.t.Log(v...) + panic(v) +} + +func (l *nodeTestHarness) Panicf(format string, v ...interface{}) { + l.t.Errorf(format, v...) + panic(fmt.Sprintf(format, v...)) +} + +func newNodeTestHarness(t *testing.T, ctx context.Context, cfg *Config, peers ...Peer) (_ context.Context, cancel func(), _ *nodeTestHarness) { + // Wrap context in a 10s timeout to make tests more robust. Otherwise, + // it's likely that deadlock will occur unless Node behaves exactly as + // expected - when you expect a Ready and start waiting on the channel + // but no Ready ever shows up, for example. + ctx, cancel = context.WithTimeout(ctx, 10*time.Second) + var n *node + if len(peers) > 0 { + n = setupNode(cfg, peers) + } else { + rn, err := NewRawNode(cfg) + if err != nil { + t.Fatal(err) + } + nn := newNode(rn) + n = &nn + } + go func() { + defer func() { + if r := recover(); r != nil { + t.Error(r) + } + }() + defer cancel() + defer n.Stop() + n.run() + }() + return ctx, cancel, &nodeTestHarness{node: n, t: t} +}