raft: avoid panics during *node tests

`StartNode` runs a naked goroutine, so it's impossible to test against
it in a way that will reliably produce contained test failures when
assertions are hit on the `(*node).run` goroutine.

This commit introduces a harness that we can use in tests to wrap
this goroutine and allow it to defer errors to `*testing.T`.

Note that tests of `Node` still need to be architected carefully
since it's easy to produce a deadlock in them should things not
go exactly as planned.

Signed-off-by: Tobias Grieger <tobias.b.grieger@gmail.com>
This commit is contained in:
Tobias Grieger 2022-09-05 17:29:07 +02:00
parent 169f4c3cc7
commit dad8208a4d
3 changed files with 122 additions and 12 deletions

View File

@ -211,11 +211,7 @@ type Peer struct {
Context []byte
}
// StartNode returns a new Node given configuration and a list of raft peers.
// It appends a ConfChangeAddNode entry for each given peer to the initial log.
//
// Peers must not be zero length; call RestartNode in that case.
func StartNode(c *Config, peers []Peer) Node {
func setupNode(c *Config, peers []Peer) *node {
if len(peers) == 0 {
panic("no peers given; use RestartNode instead")
}
@ -229,11 +225,19 @@ func StartNode(c *Config, peers []Peer) Node {
}
n := newNode(rn)
go n.run()
return &n
}
// StartNode returns a new Node given configuration and a list of raft peers.
// It appends a ConfChangeAddNode entry for each given peer to the initial log.
//
// Peers must not be zero length; call RestartNode in that case.
func StartNode(c *Config, peers []Peer) Node {
n := setupNode(c, peers)
go n.run()
return n
}
// RestartNode is similar to StartNode but does not take a list of peers.
// The current membership of the cluster will be restored from the Storage.
// If the caller has an existing state machine, pass in the last log index that

View File

@ -740,9 +740,6 @@ func TestNodeRestartFromSnapshot(t *testing.T) {
}
func TestNodeAdvance(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
storage := NewMemoryStorage()
c := &Config{
ID: 1,
@ -752,8 +749,8 @@ func TestNodeAdvance(t *testing.T) {
MaxSizePerMsg: noLimit,
MaxInflightMsgs: 256,
}
n := StartNode(c, []Peer{{ID: 1}})
defer n.Stop()
ctx, cancel, n := newNodeTestHarness(t, context.Background(), c, Peer{ID: 1})
defer cancel()
rd := <-n.Ready()
storage.Append(rd.Entries)
n.Advance()

109
raft/node_util_test.go Normal file
View File

@ -0,0 +1,109 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package raft
import (
"context"
"fmt"
"testing"
"time"
)
type nodeTestHarness struct {
*node
t *testing.T
}
func (l *nodeTestHarness) Debug(v ...interface{}) {
l.t.Log(v...)
}
func (l *nodeTestHarness) Debugf(format string, v ...interface{}) {
l.t.Logf(format, v...)
}
func (l *nodeTestHarness) Error(v ...interface{}) {
l.t.Error(v...)
}
func (l *nodeTestHarness) Errorf(format string, v ...interface{}) {
l.t.Errorf(format, v...)
}
func (l *nodeTestHarness) Info(v ...interface{}) {
l.t.Log(v...)
}
func (l *nodeTestHarness) Infof(format string, v ...interface{}) {
l.t.Logf(format, v...)
}
func (l *nodeTestHarness) Warning(v ...interface{}) {
l.t.Log(v...)
}
func (l *nodeTestHarness) Warningf(format string, v ...interface{}) {
l.t.Logf(format, v...)
}
func (l *nodeTestHarness) Fatal(v ...interface{}) {
l.t.Error(v...)
panic(v)
}
func (l *nodeTestHarness) Fatalf(format string, v ...interface{}) {
l.t.Errorf(format, v...)
panic(fmt.Sprintf(format, v...))
}
func (l *nodeTestHarness) Panic(v ...interface{}) {
l.t.Log(v...)
panic(v)
}
func (l *nodeTestHarness) Panicf(format string, v ...interface{}) {
l.t.Errorf(format, v...)
panic(fmt.Sprintf(format, v...))
}
func newNodeTestHarness(t *testing.T, ctx context.Context, cfg *Config, peers ...Peer) (_ context.Context, cancel func(), _ *nodeTestHarness) {
// Wrap context in a 10s timeout to make tests more robust. Otherwise,
// it's likely that deadlock will occur unless Node behaves exactly as
// expected - when you expect a Ready and start waiting on the channel
// but no Ready ever shows up, for example.
ctx, cancel = context.WithTimeout(ctx, 10*time.Second)
var n *node
if len(peers) > 0 {
n = setupNode(cfg, peers)
} else {
rn, err := NewRawNode(cfg)
if err != nil {
t.Fatal(err)
}
nn := newNode(rn)
n = &nn
}
go func() {
defer func() {
if r := recover(); r != nil {
t.Error(r)
}
}()
defer cancel()
defer n.Stop()
n.run()
}()
return ctx, cancel, &nodeTestHarness{node: n, t: t}
}