Merge pull request #11738 from gyuho/wal-metrics

wal: add "etcd_wal_writes_bytes_total"
This commit is contained in:
Gyuho Lee
2020-04-01 09:08:10 -07:00
committed by GitHub
6 changed files with 46 additions and 11 deletions

View File

@@ -76,6 +76,7 @@ Note that any `etcd_debugging_*` metrics are experimental and subject to change.
- Change [`etcd_cluster_version`](https://github.com/etcd-io/etcd/pull/11254) Prometheus metrics to include only major and minor version.
- Add [`etcd_debugging_mvcc_total_put_size_in_bytes`](https://github.com/etcd-io/etcd/pull/11374) Prometheus metric.
- Add [`etcd_server_client_requests_total` with `"type"` and `"client_api_version"` labels](https://github.com/etcd-io/etcd/pull/11687).
- Add [`etcd_wal_write_bytes_total`](https://github.com/etcd-io/etcd/pull/11738).
### etcd server
@@ -121,6 +122,10 @@ Note that any `etcd_debugging_*` metrics are experimental and subject to change.
- https://github.com/etcd-io/etcd/issues/11495
- https://github.com/etcd-io/etcd/issues/11730
### Package `wal`
- Add [`etcd_wal_write_bytes_total`](https://github.com/etcd-io/etcd/pull/11738).
### etcdctl v3
- Fix `etcdctl member add` command to prevent potential timeout. ([PR#11194](https://github.com/etcd-io/etcd/pull/11194) and [PR#11638](https://github.com/etcd-io/etcd/pull/11638))

View File

@@ -95,12 +95,23 @@ func (pw *PageWriter) Write(p []byte) (n int, err error) {
return n, werr
}
// Flush flushes buffered data.
func (pw *PageWriter) Flush() error {
if pw.bufferedBytes == 0 {
return nil
}
_, err := pw.w.Write(pw.buf[:pw.bufferedBytes])
pw.pageOffset = (pw.pageOffset + pw.bufferedBytes) % pw.pageBytes
pw.bufferedBytes = 0
_, err := pw.flush()
return err
}
// FlushN flushes buffered data and returns the number of written bytes.
func (pw *PageWriter) FlushN() (int, error) {
return pw.flush()
}
func (pw *PageWriter) flush() (int, error) {
if pw.bufferedBytes == 0 {
return 0, nil
}
n, err := pw.w.Write(pw.buf[:pw.bufferedBytes])
pw.pageOffset = (pw.pageOffset + pw.bufferedBytes) % pw.pageBytes
pw.bufferedBytes = 0
return n, err
}

View File

@@ -92,7 +92,8 @@ func (e *encoder) encode(rec *walpb.Record) error {
if padBytes != 0 {
data = append(data, make([]byte, padBytes)...)
}
_, err = e.bw.Write(data)
n, err = e.bw.Write(data)
walWriteBytes.Add(float64(n))
return err
}
@@ -108,13 +109,16 @@ func encodeFrameSize(dataBytes int) (lenField uint64, padBytes int) {
func (e *encoder) flush() error {
e.mu.Lock()
defer e.mu.Unlock()
return e.bw.Flush()
n, err := e.bw.FlushN()
e.mu.Unlock()
walWriteBytes.Add(float64(n))
return err
}
func writeUint64(w io.Writer, n uint64, buf []byte) error {
// http://golang.org/src/encoding/binary/binary.go
binary.LittleEndian.PutUint64(buf, n)
_, err := w.Write(buf)
nv, err := w.Write(buf)
walWriteBytes.Add(float64(nv))
return err
}

View File

@@ -27,8 +27,16 @@ var (
// highest bucket start of 0.001 sec * 2^13 == 8.192 sec
Buckets: prometheus.ExponentialBuckets(0.001, 2, 14),
})
walWriteBytes = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "disk",
Name: "wal_write_bytes_total",
Help: "Total number of bytes written in WAL.",
})
)
func init() {
prometheus.MustRegister(walFsyncSec)
prometheus.MustRegister(walWriteBytes)
}

View File

@@ -18,10 +18,10 @@ import (
"io"
"os"
"path/filepath"
"time"
"go.etcd.io/etcd/pkg/fileutil"
"go.etcd.io/etcd/wal/walpb"
"go.uber.org/zap"
)
@@ -86,10 +86,12 @@ func Repair(lg *zap.Logger, dirpath string) bool {
return false
}
start := time.Now()
if err = fileutil.Fsync(f.File); err != nil {
lg.Warn("failed to fsync", zap.String("path", f.Name()), zap.Error(err))
return false
}
walFsyncSec.Observe(time.Since(start).Seconds())
lg.Info("repaired", zap.String("path", f.Name()), zap.Error(io.ErrUnexpectedEOF))
return true

View File

@@ -193,6 +193,7 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) {
)
return nil, perr
}
start := time.Now()
if perr = fileutil.Fsync(pdir); perr != nil {
lg.Warn(
"failed to fsync the parent data directory file",
@@ -202,6 +203,8 @@ func Create(lg *zap.Logger, dirpath string, metadata []byte) (*WAL, error) {
)
return nil, perr
}
walFsyncSec.Observe(time.Since(start).Seconds())
if perr = pdir.Close(); perr != nil {
lg.Warn(
"failed to close the parent data directory file",
@@ -647,9 +650,11 @@ func (w *WAL) cut() error {
if err = os.Rename(newTail.Name(), fpath); err != nil {
return err
}
start := time.Now()
if err = fileutil.Fsync(w.dirFile); err != nil {
return err
}
walFsyncSec.Observe(time.Since(start).Seconds())
// reopen newTail with its new path so calls to Name() match the wal filename format
newTail.Close()