pkg/expect: fix data race

Let's say there is command which outputs one line and exit with error.

There are three goroutines to acquire the lock:

1. ep.read()
2. ep.waitSaveExitErr()
3. ep.Expect()

When ep.read goroutine reads the log but it doesn't acquire the lock in
time, the ep.waitSaveExitErr acquires the lock and updates the
`exitErr`. And then ep.Expect acquires lock but it doesn't see any log
yet and then returns err.

It's hard to reproduce it in local. Add the extra sleep can reproduce it.

```diff
diff --git a/pkg/expect/expect.go b/pkg/expect/expect.go
index a512a3ce4..602bea73f 100644
--- a/pkg/expect/expect.go
+++ b/pkg/expect/expect.go
@@ -128,6 +128,7 @@ func (ep *ExpectProcess) tryReadNextLine(r *bufio.Reader) error {
        printDebugLines := os.Getenv("EXPECT_DEBUG") != ""
        l, err := r.ReadString('\n')

+       time.Sleep(10 * time.Millisecond)
        ep.mu.Lock()
        defer ep.mu.Unlock()
```

See it once in Github Action [1]. In order to fix it, the patch introduces
`readCloseCh` to wait for ep.read to get all the data and retry it.

[1]: https://github.com/etcd-io/etcd/pull/16137#issuecomment-1605838518

Signed-off-by: Wei Fu <fuweid89@gmail.com>
This commit is contained in:
Wei Fu 2023-07-04 23:08:07 +08:00
parent ac58abcf7e
commit 56edfa6e28
2 changed files with 31 additions and 1 deletions

View File

@ -44,6 +44,8 @@ type ExpectProcess struct {
fpty *os.File
wg sync.WaitGroup
readCloseCh chan struct{} // close it if async read goroutine exits
mu sync.Mutex // protects lines, count, cur, exitErr and exitCode
lines []string
count int // increment whenever new line gets added
@ -67,6 +69,7 @@ func NewExpectWithEnv(name string, args []string, env []string, serverProcessCon
args: args,
env: env,
},
readCloseCh: make(chan struct{}),
}
ep.cmd = commandFromConfig(ep.cfg)
@ -100,7 +103,10 @@ func (ep *ExpectProcess) Pid() int {
}
func (ep *ExpectProcess) read() {
defer ep.wg.Done()
defer func() {
ep.wg.Done()
close(ep.readCloseCh)
}()
defer func(fpty *os.File) {
err := fpty.Close()
if err != nil {
@ -187,9 +193,25 @@ func (ep *ExpectProcess) ExpectFunc(ctx context.Context, f func(string) bool) (s
}
}
select {
// NOTE: we wait readCloseCh for ep.read() to complete draining the log before acquring the lock.
case <-ep.readCloseCh:
case <-ctx.Done():
return "", fmt.Errorf("failed to find match string: %w", ctx.Err())
}
ep.mu.Lock()
defer ep.mu.Unlock()
// retry it since we get all the log data
for i < len(ep.lines) {
line := ep.lines[i]
i++
if f(line) {
return line, nil
}
}
lastLinesIndex := len(ep.lines) - DEBUG_LINES_TAIL
if lastLinesIndex < 0 {
lastLinesIndex = 0

View File

@ -210,3 +210,11 @@ func TestExitCodeAfterKill(t *testing.T) {
assert.Equal(t, 137, code)
assert.NoError(t, err)
}
func TestExpectForFailFastCommand(t *testing.T) {
ep, err := NewExpect("sh", "-c", `echo "curl: (59) failed setting cipher list"; exit 59`)
require.NoError(t, err)
_, err = ep.Expect("failed setting cipher list")
require.NoError(t, err)
}