etcd/pkg/testutil/leak.go
Piotr Tabor 70b5ef1d3a Fix tests flakiness: in particular TestIssue6361.
The root reason of flakes, was that server was considered as ready to
early.
In particular:
```
../../bin/etcd-2456648: {"level":"info","ts":"2021-01-11T09:56:44.474+0100","caller":"rafthttp/stream.go:274","msg":"established TCP streaming connection with remote peer","stream-writer-type":"stream Message","local-member-id":"ed5f620d34a8e61b","remote-peer-id":"ca50e9357181d758"}
../../bin/etcd-2456648: {"level":"warn","ts":"2021-01-11T09:56:49.040+0100","caller":"etcdserver/server.go:1942","msg":"failed to publish local member to cluster through raft","local-member-id":"ed5f620d34a8e61b","local-member-attributes":"{Name:infra2 ClientURLs:[http://localhost:20030]}","request-path":"/0/members/ed5f620d34a8e61b/attributes","publish-timeout":"7s","error":"etcdserver: request timed out, possibly due to connection lost"}
../../bin/etcd-2456648: {"level":"info","ts":"2021-01-11T09:56:49.049+0100","caller":"etcdserver/server.go:1921","msg":"published local member to cluster through raft","local-member-id":"ed5f620d34a8e61b","local-member-attributes":"{Name:infra2 ClientURLs:[http://localhost:20030]}","request-path":"/0/members/ed5f620d34a8e61b/attributes","cluster-id":"34f27e83b3bc2ff","publish-timeout":"7s"}
```
was taking 5s.   If this was happening concurrently with etcdctl, the
etcdctl could timeout.

The fix, requires servers to report 'ready to serve client requests' to consider them up.

Fixed also some whitelisted 'goroutines'.
2021-01-12 00:14:51 +01:00

158 lines
4.4 KiB
Go

// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package testutil
import (
"fmt"
"net/http"
"os"
"regexp"
"runtime"
"sort"
"strings"
"testing"
"time"
)
/*
CheckLeakedGoroutine verifies tests do not leave any leaky
goroutines. It returns true when there are goroutines still
running(leaking) after all tests.
import "go.etcd.io/etcd/pkg/v3/testutil"
func TestMain(m *testing.M) {
testutil.MustTestMainWithLeakDetection(m)
}
func TestSample(t *testing.T) {
defer testutil.AfterTest(t)
...
}
*/
func CheckLeakedGoroutine() bool {
gs := interestingGoroutines()
if len(gs) == 0 {
return false
}
stackCount := make(map[string]int)
re := regexp.MustCompile(`\(0[0-9a-fx, ]*\)`)
for _, g := range gs {
// strip out pointer arguments in first function of stack dump
normalized := string(re.ReplaceAll([]byte(g), []byte("(...)")))
stackCount[normalized]++
}
fmt.Fprintf(os.Stderr, "Unexpected goroutines running after all test(s).\n")
for stack, count := range stackCount {
fmt.Fprintf(os.Stderr, "%d instances of:\n%s\n", count, stack)
}
return true
}
// CheckAfterTest returns an error if AfterTest would fail with an error.
// Waits for go-routines shutdown for 'd'.
func CheckAfterTest(d time.Duration) error {
http.DefaultTransport.(*http.Transport).CloseIdleConnections()
var bad string
badSubstring := map[string]string{
").writeLoop(": "a Transport",
"created by net/http/httptest.(*Server).Start": "an httptest.Server",
"timeoutHandler": "a TimeoutHandler",
"net.(*netFD).connect(": "a timing out dial",
").noteClientGone(": "a closenotifier sender",
").readLoop(": "a Transport",
".grpc": "a gRPC resource",
").sendCloseSubstream(": "a stream closing routine",
}
var stacks string
begin := time.Now()
for time.Since(begin) < d {
bad = ""
stacks = strings.Join(interestingGoroutines(), "\n\n")
for substr, what := range badSubstring {
if strings.Contains(stacks, substr) {
bad = what
}
}
if bad == "" {
return nil
}
// Bad stuff found, but goroutines might just still be
// shutting down, so give it some time.
time.Sleep(50 * time.Millisecond)
}
return fmt.Errorf("appears to have leaked %s:\n%s", bad, stacks)
}
// AfterTest is meant to run in a defer that executes after a test completes.
// It will detect common goroutine leaks, retrying in case there are goroutines
// not synchronously torn down, and fail the test if any goroutines are stuck.
func AfterTest(t *testing.T) {
if err := CheckAfterTest(300 * time.Millisecond); err != nil {
t.Errorf("Test %v", err)
}
}
func interestingGoroutines() (gs []string) {
buf := make([]byte, 2<<20)
buf = buf[:runtime.Stack(buf, true)]
for _, g := range strings.Split(string(buf), "\n\n") {
sl := strings.SplitN(g, "\n", 2)
if len(sl) != 2 {
continue
}
stack := strings.TrimSpace(sl[1])
if stack == "" ||
strings.Contains(stack, "sync.(*WaitGroup).Done") ||
strings.Contains(stack, "os.(*file).close") ||
strings.Contains(stack, "os.(*Process).Release") ||
strings.Contains(stack, "created by os/signal.init") ||
strings.Contains(stack, "runtime/panic.go") ||
strings.Contains(stack, "created by testing.RunTests") ||
strings.Contains(stack, "created by testing.runTests") ||
strings.Contains(stack, "created by testing.(*T).Run") ||
strings.Contains(stack, "testing.Main(") ||
strings.Contains(stack, "runtime.goexit") ||
strings.Contains(stack, "go.etcd.io/etcd/pkg/v3/testutil.interestingGoroutines") ||
strings.Contains(stack, "go.etcd.io/etcd/pkg/v3/logutil.(*MergeLogger).outputLoop") ||
strings.Contains(stack, "github.com/golang/glog.(*loggingT).flushDaemon") ||
strings.Contains(stack, "created by runtime.gc") ||
strings.Contains(stack, "created by text/template/parse.lex") ||
strings.Contains(stack, "runtime.MHeap_Scavenger") {
continue
}
gs = append(gs, stack)
}
sort.Strings(gs)
return gs
}
func MustCheckLeakedGoroutine() {
http.DefaultTransport.(*http.Transport).CloseIdleConnections()
CheckAfterTest(5 * time.Second)
// Let the other goroutines finalize.
runtime.Gosched()
if CheckLeakedGoroutine() {
os.Exit(1)
}
}
// MustTestMainWithLeakDetection expands standard m.Run with leaked
// goroutines detection.
func MustTestMainWithLeakDetection(m *testing.M) {
v := m.Run()
if v == 0 {
MustCheckLeakedGoroutine()
}
os.Exit(v)
}