mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
etcdmain: close client conns when it exceeds limit
This solves the problem that etcd may fatal because its critical path cannot get file descriptor resource when the number of clients is too big. The PR lets the client listener close client connections immediately after they are accepted when the file descriptor usage in the process reaches some pre-set limit, so it ensures that the internal critical path could always get file descriptor when it needs. When there are tons to clients connecting to the server, the original behavior is like this: ``` 2015/08/4 16:42:08 etcdserver: cannot monitor file descriptor usage (open /proc/self/fd: too many open files) 2015/08/4 16:42:33 etcdserver: failed to purge snap file open default2.etcd/member/snap: too many open files [halted] ``` Current behavior is like this: ``` 2015/08/6 19:05:25 transport: accept error: closing connection, exceed file descriptor usage limitation (fd limit=874) 2015/08/6 19:05:25 transport: accept error: closing connection, exceed file descriptor usage limitation (fd limit=874) 2015/08/6 19:05:26 transport: accept error: closing connection, exceed file descriptor usage limitation (fd limit=874) 2015/08/6 19:05:27 transport: accept error: closing connection, exceed file descriptor usage limitation (fd limit=874) 2015/08/6 19:05:28 transport: accept error: closing connection, exceed file descriptor usage limitation (fd limit=874) 2015/08/6 19:05:28 etcdserver: 80% of the file descriptor limit is used [used = 873, limit = 1024] ``` It is available at linux system today because pkg/runtime only has linux support.
This commit is contained in:
parent
219ed1695b
commit
97923ca3fc
@ -33,6 +33,7 @@ import (
|
||||
"github.com/coreos/etcd/pkg/cors"
|
||||
"github.com/coreos/etcd/pkg/fileutil"
|
||||
"github.com/coreos/etcd/pkg/osutil"
|
||||
runtimeutil "github.com/coreos/etcd/pkg/runtime"
|
||||
"github.com/coreos/etcd/pkg/transport"
|
||||
"github.com/coreos/etcd/pkg/types"
|
||||
"github.com/coreos/etcd/proxy"
|
||||
@ -49,6 +50,18 @@ var plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "etcdmain")
|
||||
const (
|
||||
// the owner can make/remove files inside the directory
|
||||
privateDirMode = 0700
|
||||
|
||||
// internal fd usage includes disk usage and transport usage.
|
||||
// To read/write snapshot, snap pkg needs 1. In normal case, wal pkg needs
|
||||
// at most 2 to read/lock/write WALs. One case that it needs to 2 is to
|
||||
// read all logs after some snapshot index, which locates at the end of
|
||||
// the second last and the head of the last. For purging, it needs to read
|
||||
// directory, so it needs 1. For fd monitor, it needs 1.
|
||||
// For transport, rafthttp builds two long-polling connections and at most
|
||||
// four temporary connections with each member. There are at most 9 members
|
||||
// in a cluster, so it should reserve 96.
|
||||
// For the safety, we set the total reserved number to 150.
|
||||
reservedInternalFDNum = 150
|
||||
)
|
||||
|
||||
var (
|
||||
@ -188,6 +201,12 @@ func startEtcd(cfg *config) (<-chan struct{}, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if fdLimit, err := runtimeutil.FDLimit(); err == nil {
|
||||
if fdLimit <= reservedInternalFDNum {
|
||||
plog.Fatalf("file descriptor limit[%d] of etcd process is too low, and should be set higher than %d to ensure internal usage", fdLimit, reservedInternalFDNum)
|
||||
}
|
||||
l = &transport.LimitedConnListener{Listener: l, RuntimeFDLimit: fdLimit - reservedInternalFDNum}
|
||||
}
|
||||
|
||||
urlStr := u.String()
|
||||
plog.Info("listening for client requests on ", urlStr)
|
||||
|
55
pkg/transport/limited_conn_listener.go
Normal file
55
pkg/transport/limited_conn_listener.go
Normal file
@ -0,0 +1,55 @@
|
||||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package transport
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"net"
|
||||
|
||||
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/coreos/pkg/capnslog"
|
||||
"github.com/coreos/etcd/pkg/runtime"
|
||||
)
|
||||
|
||||
var plog = capnslog.NewPackageLogger("github.com/coreos/etcd/pkg", "transport")
|
||||
|
||||
type LimitedConnListener struct {
|
||||
net.Listener
|
||||
RuntimeFDLimit uint64
|
||||
}
|
||||
|
||||
func (l *LimitedConnListener) Accept() (net.Conn, error) {
|
||||
conn, err := l.Listener.Accept()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
n, err := runtime.FDUsage()
|
||||
// Check whether fd number in use exceeds the set limit.
|
||||
if err == nil && n >= l.RuntimeFDLimit {
|
||||
conn.Close()
|
||||
plog.Errorf("accept error: closing connection, exceed file descriptor usage limitation (fd limit=%d)", l.RuntimeFDLimit)
|
||||
return nil, &acceptError{error: errors.New("exceed file descriptor usage limitation"), temporary: true}
|
||||
}
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
type acceptError struct {
|
||||
error
|
||||
temporary bool
|
||||
}
|
||||
|
||||
func (e *acceptError) Timeout() bool { return false }
|
||||
|
||||
func (e *acceptError) Temporary() bool { return e.temporary }
|
79
pkg/transport/limited_conn_listener_test.go
Normal file
79
pkg/transport/limited_conn_listener_test.go
Normal file
@ -0,0 +1,79 @@
|
||||
// Copyright 2015 CoreOS, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package transport
|
||||
|
||||
import (
|
||||
"net"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/coreos/etcd/pkg/runtime"
|
||||
)
|
||||
|
||||
func TestLimitedConnListenerAccept(t *testing.T) {
|
||||
if _, err := runtime.FDUsage(); err != nil {
|
||||
t.Skip("skip test due to unsupported runtime.FDUsage")
|
||||
}
|
||||
|
||||
ln, err := net.Listen("tcp", ":0")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
fdNum, err := runtime.FDUsage()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
srv := &httptest.Server{
|
||||
Listener: &LimitedConnListener{
|
||||
Listener: ln,
|
||||
RuntimeFDLimit: fdNum + 100,
|
||||
},
|
||||
Config: &http.Server{},
|
||||
}
|
||||
srv.Start()
|
||||
defer srv.Close()
|
||||
|
||||
resp, err := http.Get(srv.URL)
|
||||
defer resp.Body.Close()
|
||||
if err != nil {
|
||||
t.Fatalf("Get error = %v, want nil", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLimitedConnListenerLimit(t *testing.T) {
|
||||
if _, err := runtime.FDUsage(); err != nil {
|
||||
t.Skip("skip test due to unsupported runtime.FDUsage")
|
||||
}
|
||||
|
||||
ln, err := net.Listen("tcp", ":0")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
srv := &httptest.Server{
|
||||
Listener: &LimitedConnListener{
|
||||
Listener: ln,
|
||||
RuntimeFDLimit: 0,
|
||||
},
|
||||
Config: &http.Server{},
|
||||
}
|
||||
srv.Start()
|
||||
defer srv.Close()
|
||||
|
||||
_, err = http.Get(srv.URL)
|
||||
if err == nil {
|
||||
t.Fatalf("unexpected nil Get error")
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user