Merge pull request #12214 from gyuho/fd

*: optimize runtime.FDUsage + add OS level FD metrics
This commit is contained in:
Gyuho Lee 2020-08-12 18:37:05 -07:00 committed by GitHub
commit 93cf449205
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 37 additions and 5 deletions

View File

@ -91,6 +91,7 @@ Note that any `etcd_debugging_*` metrics are experimental and subject to change.
- Add [`etcd_server_client_requests_total` with `"type"` and `"client_api_version"` labels](https://github.com/etcd-io/etcd/pull/11687).
- Add [`etcd_wal_write_bytes_total`](https://github.com/etcd-io/etcd/pull/11738).
- Add [`etcd_debugging_auth_revision`](https://github.com/etcd-io/etcd/commit/f14d2a087f7b0fd6f7980b95b5e0b945109c95f3).
- Add [`os_fd_used` and `os_fd_limit` to monitor current OS file descriptors](https://github.com/etcd-io/etcd/pull/12214).
### etcd server
@ -130,12 +131,16 @@ Note that any `etcd_debugging_*` metrics are experimental and subject to change.
- Add [`--unsafe-no-fsync`](https://github.com/etcd-io/etcd/pull/11946) flag.
- Setting the flag disables all uses of fsync, which is unsafe and will cause data loss. This flag makes it possible to run an etcd node for testing and development without placing lots of load on the file system.
- Add [etcd --auth-token-ttl](https://github.com/etcd-io/etcd/pull/11980) flag to customize `simpleTokenTTL` settings.
- Improve [runtime.FDUsage objects malloc of Memory Usage and CPU Usage](https://github.com/etcd-io/etcd/pull/11986).
- Improve [`runtime.FDUsage` call pattern to reduce objects malloc of Memory Usage and CPU Usage](https://github.com/etcd-io/etcd/pull/11986).
- Improve [mvcc.watchResponse channel Memory Usage](https://github.com/etcd-io/etcd/pull/11987).
- Log [expensive request info in UnaryInterceptor](https://github.com/etcd-io/etcd/pull/12086).
- [Fix invalid Go type in etcdserverpb](https://github.com/etcd-io/etcd/pull/12000).
- [Improve healthcheck by using v3 range request and its corresponding timeout](https://github.com/etcd-io/etcd/pull/12195).
### Package `runtime`
- Optimize [`runtime.FDUsage` by removing unnecessary sorting](https://github.com/etcd-io/etcd/pull/12214).
### Package `embed`
- Remove [`embed.Config.Debug`](https://github.com/etcd-io/etcd/pull/10947).

View File

@ -151,6 +151,19 @@ var (
Help: "Server or member ID in hexadecimal format. 1 for 'server_id' label with current ID.",
},
[]string{"server_id"})
fdUsed = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "os",
Subsystem: "fd",
Name: "used",
Help: "The number of used file descriptors.",
})
fdLimit = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "os",
Subsystem: "fd",
Name: "limit",
Help: "The file descriptor limit.",
})
)
func init() {
@ -174,6 +187,8 @@ func init() {
prometheus.MustRegister(isLearner)
prometheus.MustRegister(learnerPromoteSucceed)
prometheus.MustRegister(learnerPromoteFailed)
prometheus.MustRegister(fdUsed)
prometheus.MustRegister(fdLimit)
currentVersion.With(prometheus.Labels{
"server_version": version.Version,
@ -184,7 +199,6 @@ func init() {
}
func monitorFileDescriptor(lg *zap.Logger, done <-chan struct{}) {
// This ticker will check File Descriptor Requirements ,and count all fds in used.
// And recorded some logs when in used >= limit/5*4. Just recorded message.
// If fds was more than 10K,It's low performance due to FDUsage() works.
@ -198,11 +212,13 @@ func monitorFileDescriptor(lg *zap.Logger, done <-chan struct{}) {
lg.Warn("failed to get file descriptor usage", zap.Error(err))
return
}
fdUsed.Set(float64(used))
limit, err := runtime.FDLimit()
if err != nil {
lg.Warn("failed to get file descriptor limit", zap.Error(err))
return
}
fdLimit.Set(float64(limit))
if used >= limit/5*4 {
lg.Warn("80% of file descriptors are used", zap.Uint64("used", used), zap.Uint64("limit", limit))
}

View File

@ -16,7 +16,7 @@
package runtime
import (
"io/ioutil"
"os"
"syscall"
)
@ -29,9 +29,20 @@ func FDLimit() (uint64, error) {
}
func FDUsage() (uint64, error) {
fds, err := ioutil.ReadDir("/proc/self/fd")
return countFiles("/proc/self/fd")
}
// countFiles reads the directory named by dirname and returns the count.
// This is same as stdlib "io/ioutil.ReadDir" but without sorting.
func countFiles(dirname string) (uint64, error) {
f, err := os.Open(dirname)
if err != nil {
return 0, err
}
return uint64(len(fds)), nil
list, err := f.Readdir(-1)
f.Close()
if err != nil {
return 0, err
}
return uint64(len(list)), nil
}