mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Fix TestSnapshotV3RestoreMultiMemberAdd flakes (leaks)
- most important: unix's socket transport should not keep idle
connections. For top-level Transport we close them using:
f3c518025e/server/etcdserver/api/rafthttp/transport.go (L226)
but currently we don't have access to close them witing the nest (unix) transport. Short idle deadline is good enough.
- Use dialContext (instead of dial) to make sure context is passed down the stack
- Make sure Context is cancelled as soon as the operation is done in pipeline
- nit: use dedicated method to yeld goroutines.
Tested with:
```
d=$(date +"%Y%m%d_%H%M")
(cd tests && go test --timeout=60m ./integration/snapshot -run TestSnapshotV3RestoreMultiMemberAdd -v --count=180 2>&1 | tee log_${d}.log)
```
There were transports & cmux leaked:
```
leak.go:118: Test appears to have leaked a Transport:
internal/poll.runtime_pollWait(0x7f6c5c3784c8, 0x72, 0xffffffffffffffff)
/usr/lib/google-golang/src/runtime/netpoll.go:222 +0x55
internal/poll.(*pollDesc).wait(0xc003296298, 0x72, 0x0, 0x18, 0xffffffffffffffff)
/usr/lib/google-golang/src/internal/poll/fd_poll_runtime.go:87 +0x45
internal/poll.(*pollDesc).waitRead(...)
/usr/lib/google-golang/src/internal/poll/fd_poll_runtime.go:92
internal/poll.(*FD).Read(0xc003296280, 0xc0031f60a8, 0x18, 0x18, 0x0, 0x0, 0x0)
/usr/lib/google-golang/src/internal/poll/fd_unix.go:166 +0x1d5
net.(*netFD).Read(0xc003296280, 0xc0031f60a8, 0x18, 0x18, 0x18, 0xc0009056e2, 0x203000)
/usr/lib/google-golang/src/net/fd_posix.go:55 +0x4f
net.(*conn).Read(0xc000010258, 0xc0031f60a8, 0x18, 0x18, 0x0, 0x0, 0x0)
/usr/lib/google-golang/src/net/net.go:183 +0x91
github.com/soheilhy/cmux.(*bufferedReader).Read(0xc0003d24e0, 0xc0031f60a8, 0x18, 0x18, 0xc0003d24d0, 0xc0009056e2, 0xc000278400)
/home/ptab/private/golang/pkg/mod/github.com/soheilhy/cmux@v0.1.5/buffer.go:53 +0x12d
github.com/soheilhy/cmux.hasHTTP2Preface(0x1367e20, 0xc0003d24e0, 0x7f6c5c699f40)
/home/ptab/private/golang/pkg/mod/github.com/soheilhy/cmux@v0.1.5/matchers.go:195 +0x8a
github.com/soheilhy/cmux.matchersToMatchWriters.func1(0x7f6c5c699f40, 0xc000010258, 0x1367e20, 0xc0003d24e0, 0xc000010258)
/home/ptab/private/golang/pkg/mod/github.com/soheilhy/cmux@v0.1.5/cmux.go:128 +0x39
github.com/soheilhy/cmux.(*cMux).serve(0xc003228690, 0x138c410, 0xc000010258, 0xc00327f740, 0xc0059ba860)
/home/ptab/private/golang/pkg/mod/github.com/soheilhy/cmux@v0.1.5/cmux.go:192 +0x1e7
created by github.com/soheilhy/cmux.(*cMux).Serve
/home/ptab/private/golang/pkg/mod/github.com/soheilhy/cmux@v0.1.5/cmux.go:179 +0x191
internal/poll.runtime_pollWait(0x7f6c5c60f3f0, 0x72, 0xffffffffffffffff)
/usr/lib/google-golang/src/runtime/netpoll.go:222 +0x55
internal/poll.(*pollDesc).wait(0xc000d53018, 0x72, 0x1000, 0x1000, 0xffffffffffffffff)
/usr/lib/google-golang/src/internal/poll/fd_poll_runtime.go:87 +0x45
internal/poll.(*pollDesc).waitRead(...)
/usr/lib/google-golang/src/internal/poll/fd_poll_runtime.go:92
internal/poll.(*FD).Read(0xc000d53000, 0xc000cfd000, 0x1000, 0x1000, 0x0, 0x0, 0x0)
/usr/lib/google-golang/src/internal/poll/fd_unix.go:166 +0x1d5
net.(*netFD).Read(0xc000d53000, 0xc000cfd000, 0x1000, 0x1000, 0x3, 0x3, 0x1000000000001)
/usr/lib/google-golang/src/net/fd_posix.go:55 +0x4f
net.(*conn).Read(0xc00031a570, 0xc000cfd000, 0x1000, 0x1000, 0x0, 0x0, 0x0)
/usr/lib/google-golang/src/net/net.go:183 +0x91
net/http.(*persistConn).Read(0xc00093b320, 0xc000cfd000, 0x1000, 0x1000, 0x577750, 0x60, 0x0)
/usr/lib/google-golang/src/net/http/transport.go:1933 +0x77
bufio.(*Reader).fill(0xc005702fc0)
/usr/lib/google-golang/src/bufio/bufio.go:101 +0x108
bufio.(*Reader).Peek(0xc005702fc0, 0x1, 0xc00077c660, 0xc003b082a0, 0xc000d08de0, 0x5ae586, 0x11dd6c0)
/usr/lib/google-golang/src/bufio/bufio.go:139 +0x4f
net/http.(*persistConn).readLoop(0xc00093b320)
/usr/lib/google-golang/src/net/http/transport.go:2094 +0x1a8
created by net/http.(*Transport).dialConn
/usr/lib/google-golang/src/net/http/transport.go:1754 +0xdaa
net/http.(*persistConn).writeLoop(0xc00093b320)
/usr/lib/google-golang/src/net/http/transport.go:2393 +0xf7
created by net/http.(*Transport).dialConn
/usr/lib/google-golang/src/net/http/transport.go:1755 +0xdcf
sync.runtime_Semacquire(0xc0059ba868)
/usr/lib/google-golang/src/runtime/sema.go:56 +0x45
sync.(*WaitGroup).Wait(0xc0059ba860)
/usr/lib/google-golang/src/sync/waitgroup.go:130 +0x65
github.com/soheilhy/cmux.(*cMux).Serve.func1(0xc003228690, 0xc0059ba860)
/home/ptab/private/golang/pkg/mod/github.com/soheilhy/cmux@v0.1.5/cmux.go:158 +0x56
github.com/soheilhy/cmux.(*cMux).Serve(0xc003228690, 0x13698c0, 0xc00377a0f0)
/home/ptab/private/golang/pkg/mod/github.com/soheilhy/cmux@v0.1.5/cmux.go:173 +0x115
go.etcd.io/etcd/server/v3/embed.(*Etcd).servePeers.func1(0xc0007cc360, 0x122b75f)
/home/ptab/corp/etcd/server/embed/etcd.go:518 +0x2b9
go.etcd.io/etcd/server/v3/embed.(*Etcd).servePeers.func3(0xc00036d080, 0xc0059330a0)
/home/ptab/corp/etcd/server/embed/etcd.go:549 +0x182
created by go.etcd.io/etcd/server/v3/embed.(*Etcd).servePeers
/home/ptab/corp/etcd/server/embed/etcd.go:543 +0x73a
--- FAIL: TestSnapshotV3RestoreMultiMemberAdd (17.74s)
```
This commit is contained in:
parent
f3c518025e
commit
17b982382e
@ -15,6 +15,7 @@
|
|||||||
package transport
|
package transport
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
@ -31,11 +32,11 @@ func NewTransport(info TLSInfo, dialtimeoutd time.Duration) (*http.Transport, er
|
|||||||
|
|
||||||
t := &http.Transport{
|
t := &http.Transport{
|
||||||
Proxy: http.ProxyFromEnvironment,
|
Proxy: http.ProxyFromEnvironment,
|
||||||
Dial: (&net.Dialer{
|
DialContext: (&net.Dialer{
|
||||||
Timeout: dialtimeoutd,
|
Timeout: dialtimeoutd,
|
||||||
// value taken from http.DefaultTransport
|
// value taken from http.DefaultTransport
|
||||||
KeepAlive: 30 * time.Second,
|
KeepAlive: 30 * time.Second,
|
||||||
}).Dial,
|
}).DialContext,
|
||||||
// value taken from http.DefaultTransport
|
// value taken from http.DefaultTransport
|
||||||
TLSHandshakeTimeout: 10 * time.Second,
|
TLSHandshakeTimeout: 10 * time.Second,
|
||||||
TLSClientConfig: cfg,
|
TLSClientConfig: cfg,
|
||||||
@ -45,15 +46,20 @@ func NewTransport(info TLSInfo, dialtimeoutd time.Duration) (*http.Transport, er
|
|||||||
Timeout: dialtimeoutd,
|
Timeout: dialtimeoutd,
|
||||||
KeepAlive: 30 * time.Second,
|
KeepAlive: 30 * time.Second,
|
||||||
}
|
}
|
||||||
dial := func(net, addr string) (net.Conn, error) {
|
|
||||||
return dialer.Dial("unix", addr)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
dialContext := func(ctx context.Context, net, addr string) (net.Conn, error) {
|
||||||
|
return dialer.DialContext(ctx, "unix", addr)
|
||||||
|
}
|
||||||
tu := &http.Transport{
|
tu := &http.Transport{
|
||||||
Proxy: http.ProxyFromEnvironment,
|
Proxy: http.ProxyFromEnvironment,
|
||||||
Dial: dial,
|
DialContext: dialContext,
|
||||||
TLSHandshakeTimeout: 10 * time.Second,
|
TLSHandshakeTimeout: 10 * time.Second,
|
||||||
TLSClientConfig: cfg,
|
TLSClientConfig: cfg,
|
||||||
|
// Cost of reopening connection on sockets is low, and they are mostly used in testing.
|
||||||
|
// Long living unix-transport connections were leading to 'leak' test flakes.
|
||||||
|
// Alternativly the returned Transport (t) should override CloseIdleConnections to
|
||||||
|
// forward it to 'tu' as well.
|
||||||
|
IdleConnTimeout: time.Microsecond,
|
||||||
}
|
}
|
||||||
ut := &unixTransport{tu}
|
ut := &unixTransport{tu}
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
package proxy
|
package proxy
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
mrand "math/rand"
|
mrand "math/rand"
|
||||||
@ -295,6 +296,7 @@ func (s *server) To() string {
|
|||||||
func (s *server) listenAndServe() {
|
func (s *server) listenAndServe() {
|
||||||
defer s.closeWg.Done()
|
defer s.closeWg.Done()
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
s.lg.Info("proxy is listening on", zap.String("from", s.From()))
|
s.lg.Info("proxy is listening on", zap.String("from", s.From()))
|
||||||
close(s.readyc)
|
close(s.readyc)
|
||||||
|
|
||||||
@ -380,7 +382,7 @@ func (s *server) listenAndServe() {
|
|||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
out, err = tp.Dial(s.to.Scheme, s.to.Host)
|
out, err = tp.DialContext(ctx, s.to.Scheme, s.to.Host)
|
||||||
} else {
|
} else {
|
||||||
out, err = net.Dial(s.to.Scheme, s.to.Host)
|
out, err = net.Dial(s.to.Scheme, s.to.Host)
|
||||||
}
|
}
|
||||||
|
@ -16,6 +16,7 @@ package proxy
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"context"
|
||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
@ -615,7 +616,7 @@ func send(t *testing.T, data []byte, scheme, addr string, tlsInfo transport.TLSI
|
|||||||
if terr != nil {
|
if terr != nil {
|
||||||
t.Fatal(terr)
|
t.Fatal(terr)
|
||||||
}
|
}
|
||||||
out, err = tp.Dial(scheme, addr)
|
out, err = tp.DialContext(context.Background(), scheme, addr)
|
||||||
} else {
|
} else {
|
||||||
out, err = net.Dial(scheme, addr)
|
out, err = net.Dial(scheme, addr)
|
||||||
}
|
}
|
||||||
|
@ -19,6 +19,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"runtime"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -139,6 +140,7 @@ func (p *pipeline) post(data []byte) (err error) {
|
|||||||
go func() {
|
go func() {
|
||||||
select {
|
select {
|
||||||
case <-done:
|
case <-done:
|
||||||
|
cancel()
|
||||||
case <-p.stopc:
|
case <-p.stopc:
|
||||||
waitSchedule()
|
waitSchedule()
|
||||||
cancel()
|
cancel()
|
||||||
@ -173,4 +175,4 @@ func (p *pipeline) post(data []byte) (err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// waitSchedule waits other goroutines to be scheduled for a while
|
// waitSchedule waits other goroutines to be scheduled for a while
|
||||||
func waitSchedule() { time.Sleep(time.Millisecond) }
|
func waitSchedule() { runtime.Gosched() }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user