mirror of
https://github.com/etcd-io/etcd.git
synced 2024-09-27 06:25:44 +00:00
Merge pull request #5237 from brian-brazil/master
Improve some debug metrics.
This commit is contained in:
commit
b58f8dd64b
@ -79,16 +79,13 @@ The metrics under the `etcd_debugging` prefix are for debugging. They are very i
|
||||
|
||||
| Name | Description | Type |
|
||||
|-----------------------------------------|--------------------------------------------------|-----------|
|
||||
| file_descriptors_used_total | The total number of file descriptors used | Gauge |
|
||||
| proposal_durations_seconds | The latency distributions of committing proposal | Histogram |
|
||||
| pending_proposal_total | The total number of pending proposals | Gauge |
|
||||
| proposals_pending | The current number of pending proposals | Gauge |
|
||||
| proposal_failed_total | The total number of failed proposals | Counter |
|
||||
|
||||
Heavy file descriptor (`file_descriptors_used_total`) usage (i.e., near the process's file descriptor limit) indicates a potential file descriptor exhaustion issue. If the file descriptors are exhausted, etcd may panic because it cannot create new WAL files.
|
||||
|
||||
[Proposal][glossary-proposal] durations (`proposal_durations_seconds`) provides a proposal commit latency histogram. The reported latency reflects network and disk IO delays in etcd.
|
||||
|
||||
Pending proposal (`pending_proposal_total`) indicates how many proposals are queued for commit. A rising pending proposal total suggests there is a high client load or the cluster is unstable.
|
||||
Proposals pending (`proposals_pending`) indicates how many proposals are queued for commit. Rising pending proposals suggests there is a high client load or the cluster is unstable.
|
||||
|
||||
Failed proposals (`proposal_failed_total`) are normally related to two issues: temporary failures related to a leader election or longer duration downtime caused by a loss of quorum in the cluster.
|
||||
|
||||
@ -127,6 +124,17 @@ Label `msgType` is the type of raft message. `MsgApp` is log replication message
|
||||
|
||||
Label `remoteID` is the member ID of the message destination.
|
||||
|
||||
## Prometheus supplied metrics
|
||||
|
||||
The Prometheus client library provides a number of metrics under the `go` and `process` namespaces. There are a few that are particlarly interesting.
|
||||
|
||||
| Name | Description | Type |
|
||||
|-----------------------------------|--------------------------------------------|--------------|
|
||||
| process_open_fds | Number of open file descriptors. | Gauge |
|
||||
| process_max_fds | Maximum number of open file descriptors. | Gauge |
|
||||
|
||||
Heavy file descriptor (`process_open_fds`) usage (i.e., near the process's file descriptor limit, `process_max_fds`) indicates a potential file descriptor exhaustion issue. If the file descriptors are exhausted, etcd may panic because it cannot create new WAL files.
|
||||
|
||||
[glossary-proposal]: glossary.md#proposal
|
||||
[prometheus]: http://prometheus.io/
|
||||
[prometheus-getting-started]: http://prometheus.io/docs/introduction/getting_started/
|
||||
|
@ -33,8 +33,8 @@ var (
|
||||
proposePending = prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Namespace: "etcd_debugging",
|
||||
Subsystem: "server",
|
||||
Name: "pending_proposal_total",
|
||||
Help: "The total number of pending proposals.",
|
||||
Name: "proposals_pending",
|
||||
Help: "The current number of pending proposals.",
|
||||
})
|
||||
// This is number of proposal failed in client's view.
|
||||
// The proposal might be later got committed in raft.
|
||||
@ -44,20 +44,12 @@ var (
|
||||
Name: "proposal_failed_total",
|
||||
Help: "The total number of failed proposals.",
|
||||
})
|
||||
|
||||
fileDescriptorUsed = prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Namespace: "etcd_debugging",
|
||||
Subsystem: "server",
|
||||
Name: "file_descriptors_used_total",
|
||||
Help: "The total number of file descriptors used.",
|
||||
})
|
||||
)
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(proposeDurations)
|
||||
prometheus.MustRegister(proposePending)
|
||||
prometheus.MustRegister(proposeFailed)
|
||||
prometheus.MustRegister(fileDescriptorUsed)
|
||||
}
|
||||
|
||||
func monitorFileDescriptor(done <-chan struct{}) {
|
||||
@ -69,7 +61,6 @@ func monitorFileDescriptor(done <-chan struct{}) {
|
||||
plog.Errorf("cannot monitor file descriptor usage (%v)", err)
|
||||
return
|
||||
}
|
||||
fileDescriptorUsed.Set(float64(used))
|
||||
limit, err := runtime.FDLimit()
|
||||
if err != nil {
|
||||
plog.Errorf("cannot monitor file descriptor usage (%v)", err)
|
||||
|
Loading…
x
Reference in New Issue
Block a user