mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00

The root reason of flakes, was that server was considered as ready to early. In particular: ``` ../../bin/etcd-2456648: {"level":"info","ts":"2021-01-11T09:56:44.474+0100","caller":"rafthttp/stream.go:274","msg":"established TCP streaming connection with remote peer","stream-writer-type":"stream Message","local-member-id":"ed5f620d34a8e61b","remote-peer-id":"ca50e9357181d758"} ../../bin/etcd-2456648: {"level":"warn","ts":"2021-01-11T09:56:49.040+0100","caller":"etcdserver/server.go:1942","msg":"failed to publish local member to cluster through raft","local-member-id":"ed5f620d34a8e61b","local-member-attributes":"{Name:infra2 ClientURLs:[http://localhost:20030]}","request-path":"/0/members/ed5f620d34a8e61b/attributes","publish-timeout":"7s","error":"etcdserver: request timed out, possibly due to connection lost"} ../../bin/etcd-2456648: {"level":"info","ts":"2021-01-11T09:56:49.049+0100","caller":"etcdserver/server.go:1921","msg":"published local member to cluster through raft","local-member-id":"ed5f620d34a8e61b","local-member-attributes":"{Name:infra2 ClientURLs:[http://localhost:20030]}","request-path":"/0/members/ed5f620d34a8e61b/attributes","cluster-id":"34f27e83b3bc2ff","publish-timeout":"7s"} ``` was taking 5s. If this was happening concurrently with etcdctl, the etcdctl could timeout. The fix, requires servers to report 'ready to serve client requests' to consider them up. Fixed also some whitelisted 'goroutines'.
158 lines
4.4 KiB
Go
158 lines
4.4 KiB
Go
// Copyright 2013 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package testutil
|
|
|
|
import (
|
|
"fmt"
|
|
"net/http"
|
|
"os"
|
|
"regexp"
|
|
"runtime"
|
|
"sort"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
/*
|
|
CheckLeakedGoroutine verifies tests do not leave any leaky
|
|
goroutines. It returns true when there are goroutines still
|
|
running(leaking) after all tests.
|
|
|
|
import "go.etcd.io/etcd/pkg/v3/testutil"
|
|
|
|
func TestMain(m *testing.M) {
|
|
testutil.MustTestMainWithLeakDetection(m)
|
|
}
|
|
|
|
func TestSample(t *testing.T) {
|
|
defer testutil.AfterTest(t)
|
|
...
|
|
}
|
|
|
|
*/
|
|
func CheckLeakedGoroutine() bool {
|
|
gs := interestingGoroutines()
|
|
if len(gs) == 0 {
|
|
return false
|
|
}
|
|
|
|
stackCount := make(map[string]int)
|
|
re := regexp.MustCompile(`\(0[0-9a-fx, ]*\)`)
|
|
for _, g := range gs {
|
|
// strip out pointer arguments in first function of stack dump
|
|
normalized := string(re.ReplaceAll([]byte(g), []byte("(...)")))
|
|
stackCount[normalized]++
|
|
}
|
|
|
|
fmt.Fprintf(os.Stderr, "Unexpected goroutines running after all test(s).\n")
|
|
for stack, count := range stackCount {
|
|
fmt.Fprintf(os.Stderr, "%d instances of:\n%s\n", count, stack)
|
|
}
|
|
return true
|
|
}
|
|
|
|
// CheckAfterTest returns an error if AfterTest would fail with an error.
|
|
// Waits for go-routines shutdown for 'd'.
|
|
func CheckAfterTest(d time.Duration) error {
|
|
http.DefaultTransport.(*http.Transport).CloseIdleConnections()
|
|
var bad string
|
|
badSubstring := map[string]string{
|
|
").writeLoop(": "a Transport",
|
|
"created by net/http/httptest.(*Server).Start": "an httptest.Server",
|
|
"timeoutHandler": "a TimeoutHandler",
|
|
"net.(*netFD).connect(": "a timing out dial",
|
|
").noteClientGone(": "a closenotifier sender",
|
|
").readLoop(": "a Transport",
|
|
".grpc": "a gRPC resource",
|
|
").sendCloseSubstream(": "a stream closing routine",
|
|
}
|
|
|
|
var stacks string
|
|
begin := time.Now()
|
|
for time.Since(begin) < d {
|
|
bad = ""
|
|
stacks = strings.Join(interestingGoroutines(), "\n\n")
|
|
for substr, what := range badSubstring {
|
|
if strings.Contains(stacks, substr) {
|
|
bad = what
|
|
}
|
|
}
|
|
if bad == "" {
|
|
return nil
|
|
}
|
|
// Bad stuff found, but goroutines might just still be
|
|
// shutting down, so give it some time.
|
|
time.Sleep(50 * time.Millisecond)
|
|
}
|
|
return fmt.Errorf("appears to have leaked %s:\n%s", bad, stacks)
|
|
}
|
|
|
|
// AfterTest is meant to run in a defer that executes after a test completes.
|
|
// It will detect common goroutine leaks, retrying in case there are goroutines
|
|
// not synchronously torn down, and fail the test if any goroutines are stuck.
|
|
func AfterTest(t *testing.T) {
|
|
if err := CheckAfterTest(300 * time.Millisecond); err != nil {
|
|
t.Errorf("Test %v", err)
|
|
}
|
|
}
|
|
|
|
func interestingGoroutines() (gs []string) {
|
|
buf := make([]byte, 2<<20)
|
|
buf = buf[:runtime.Stack(buf, true)]
|
|
for _, g := range strings.Split(string(buf), "\n\n") {
|
|
sl := strings.SplitN(g, "\n", 2)
|
|
if len(sl) != 2 {
|
|
continue
|
|
}
|
|
stack := strings.TrimSpace(sl[1])
|
|
if stack == "" ||
|
|
strings.Contains(stack, "sync.(*WaitGroup).Done") ||
|
|
strings.Contains(stack, "os.(*file).close") ||
|
|
strings.Contains(stack, "os.(*Process).Release") ||
|
|
strings.Contains(stack, "created by os/signal.init") ||
|
|
strings.Contains(stack, "runtime/panic.go") ||
|
|
strings.Contains(stack, "created by testing.RunTests") ||
|
|
strings.Contains(stack, "created by testing.runTests") ||
|
|
strings.Contains(stack, "created by testing.(*T).Run") ||
|
|
strings.Contains(stack, "testing.Main(") ||
|
|
strings.Contains(stack, "runtime.goexit") ||
|
|
strings.Contains(stack, "go.etcd.io/etcd/pkg/v3/testutil.interestingGoroutines") ||
|
|
strings.Contains(stack, "go.etcd.io/etcd/pkg/v3/logutil.(*MergeLogger).outputLoop") ||
|
|
strings.Contains(stack, "github.com/golang/glog.(*loggingT).flushDaemon") ||
|
|
strings.Contains(stack, "created by runtime.gc") ||
|
|
strings.Contains(stack, "created by text/template/parse.lex") ||
|
|
strings.Contains(stack, "runtime.MHeap_Scavenger") {
|
|
continue
|
|
}
|
|
gs = append(gs, stack)
|
|
}
|
|
sort.Strings(gs)
|
|
return gs
|
|
}
|
|
|
|
func MustCheckLeakedGoroutine() {
|
|
http.DefaultTransport.(*http.Transport).CloseIdleConnections()
|
|
|
|
CheckAfterTest(5 * time.Second)
|
|
|
|
// Let the other goroutines finalize.
|
|
runtime.Gosched()
|
|
|
|
if CheckLeakedGoroutine() {
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
// MustTestMainWithLeakDetection expands standard m.Run with leaked
|
|
// goroutines detection.
|
|
func MustTestMainWithLeakDetection(m *testing.M) {
|
|
v := m.Run()
|
|
if v == 0 {
|
|
MustCheckLeakedGoroutine()
|
|
}
|
|
os.Exit(v)
|
|
}
|