From 8a88660262f2fc1e562ded0d7225fd79c9962c5b Mon Sep 17 00:00:00 2001 From: Wei Fu Date: Sun, 27 Nov 2022 21:37:50 +0800 Subject: [PATCH] client/pkg/transport: deflake TestWriteReadTimeoutListener There is data race on `stop` channel. After verify write-timeout successfully, the case won't wait for `blocker` to receive close signal from `stop` channel. If the new `blocker`, which is to read-timeout verifier, get dial's result immediately, the new `blocker` might fetch the message from `stop` channel before old one and then close the connection, which causes that the `conn.Read` returns `EOF` when it reads data. How to reproduce this in linux devbox? Use `taskset` to limit the test process in one-cpu. ```bash cd ./client/pkg/transport go test -c -o /tmp/test --race=true ./ taskset -c 0 /tmp/test -test.run TestWriteReadTimeoutListener -test.v -test.cpu 4 -test.count=10000 -test.failfast ``` To fix this, suggest to use seperate `stop` channel to prevent from data race. Signed-off-by: Wei Fu --- client/pkg/transport/timeout_listener_test.go | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/client/pkg/transport/timeout_listener_test.go b/client/pkg/transport/timeout_listener_test.go index 0c4f20837..d2329b4db 100644 --- a/client/pkg/transport/timeout_listener_test.go +++ b/client/pkg/transport/timeout_listener_test.go @@ -47,22 +47,23 @@ func TestWriteReadTimeoutListener(t *testing.T) { writeTimeout: 10 * time.Millisecond, readTimeout: 10 * time.Millisecond, } - stop := make(chan struct{}, 1) - blocker := func() { + blocker := func(stopCh <-chan struct{}) { conn, derr := net.Dial("tcp", ln.Addr().String()) if derr != nil { t.Errorf("unexpected dail error: %v", derr) } defer conn.Close() // block the receiver until the writer timeout - <-stop + <-stopCh } - go blocker() + + writerStopCh := make(chan struct{}, 1) + go blocker(writerStopCh) conn, err := wln.Accept() if err != nil { - stop <- struct{}{} + writerStopCh <- struct{}{} t.Fatalf("unexpected accept error: %v", err) } defer conn.Close() @@ -79,20 +80,21 @@ func TestWriteReadTimeoutListener(t *testing.T) { case <-done: // It waits 1s more to avoid delay in low-end system. case <-time.After(wln.writeTimeout*10 + time.Second): - stop <- struct{}{} + writerStopCh <- struct{}{} t.Fatal("wait timeout") } if operr, ok := err.(*net.OpError); !ok || operr.Op != "write" || !operr.Timeout() { t.Errorf("err = %v, want write i/o timeout error", err) } - stop <- struct{}{} + writerStopCh <- struct{}{} - go blocker() + readerStopCh := make(chan struct{}, 1) + go blocker(readerStopCh) conn, err = wln.Accept() if err != nil { - stop <- struct{}{} + readerStopCh <- struct{}{} t.Fatalf("unexpected accept error: %v", err) } buf := make([]byte, 10) @@ -105,12 +107,12 @@ func TestWriteReadTimeoutListener(t *testing.T) { select { case <-done: case <-time.After(wln.readTimeout * 10): - stop <- struct{}{} + readerStopCh <- struct{}{} t.Fatal("wait timeout") } if operr, ok := err.(*net.OpError); !ok || operr.Op != "read" || !operr.Timeout() { t.Errorf("err = %v, want write i/o timeout error", err) } - stop <- struct{}{} + readerStopCh <- struct{}{} }