functional-tester: plog for milli-second timestamp

Standard log package by default only prints out the second-scale
so the 3rd party log feeder mixes the order of the events, which makes
the debugging hard. This replaces it with capnslog and make them consistent
with all other etcd log formats.
This commit is contained in:
Gyu-Ho Lee 2016-02-17 18:39:05 -08:00
parent 40d3e0daff
commit 7aa62ec595
6 changed files with 59 additions and 58 deletions

View File

@ -16,7 +16,6 @@ package main
import ( import (
"fmt" "fmt"
"log"
"net" "net"
"os" "os"
"os/exec" "os/exec"
@ -148,7 +147,7 @@ func (a *Agent) cleanup() error {
// https://github.com/torvalds/linux/blob/master/fs/drop_caches.c // https://github.com/torvalds/linux/blob/master/fs/drop_caches.c
cmd := exec.Command("/bin/sh", "-c", `echo "echo 1 > /proc/sys/vm/drop_caches" | sudo sh`) cmd := exec.Command("/bin/sh", "-c", `echo "echo 1 > /proc/sys/vm/drop_caches" | sudo sh`)
if err := cmd.Run(); err != nil { if err := cmd.Run(); err != nil {
log.Printf("error when cleaning page cache (%v)", err) plog.Printf("error when cleaning page cache (%v)", err)
} }
return nil return nil
} }

View File

@ -16,11 +16,14 @@ package main
import ( import (
"flag" "flag"
"log"
"os" "os"
"path/filepath" "path/filepath"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/coreos/pkg/capnslog"
) )
var plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "etcd-agent")
func main() { func main() {
etcdPath := flag.String("etcd-path", filepath.Join(os.Getenv("GOPATH"), "bin/etcd"), "the path to etcd binary") etcdPath := flag.String("etcd-path", filepath.Join(os.Getenv("GOPATH"), "bin/etcd"), "the path to etcd binary")
etcdLogPath := flag.String("etcd-log-path", "etcd.log", "the path to etcd log") etcdLogPath := flag.String("etcd-log-path", "etcd.log", "the path to etcd log")
@ -29,7 +32,7 @@ func main() {
a, err := newAgent(*etcdPath, *etcdLogPath) a, err := newAgent(*etcdPath, *etcdLogPath)
if err != nil { if err != nil {
log.Fatal(err) plog.Fatal(err)
} }
a.serveRPC(*port) a.serveRPC(*port)

View File

@ -15,7 +15,6 @@
package main package main
import ( import (
"log"
"net" "net"
"net/http" "net/http"
"net/rpc" "net/rpc"
@ -28,17 +27,17 @@ func (a *Agent) serveRPC(port string) {
rpc.HandleHTTP() rpc.HandleHTTP()
l, e := net.Listen("tcp", port) l, e := net.Listen("tcp", port)
if e != nil { if e != nil {
log.Fatal("agent:", e) plog.Fatal(e)
} }
log.Println("agent listening on", port) plog.Println("agent listening on", port)
go http.Serve(l, nil) go http.Serve(l, nil)
} }
func (a *Agent) RPCStart(args []string, pid *int) error { func (a *Agent) RPCStart(args []string, pid *int) error {
log.Printf("rpc: start etcd with args %v", args) plog.Printf("rpc: start etcd with args %v", args)
err := a.start(args...) err := a.start(args...)
if err != nil { if err != nil {
log.Println("rpc: error starting etcd", err) plog.Println("rpc: error starting etcd", err)
return err return err
} }
*pid = a.cmd.Process.Pid *pid = a.cmd.Process.Pid
@ -46,20 +45,20 @@ func (a *Agent) RPCStart(args []string, pid *int) error {
} }
func (a *Agent) RPCStop(args struct{}, reply *struct{}) error { func (a *Agent) RPCStop(args struct{}, reply *struct{}) error {
log.Printf("rpc: stop etcd") plog.Printf("rpc: stop etcd")
err := a.stop() err := a.stop()
if err != nil { if err != nil {
log.Println("rpc: error stopping etcd", err) plog.Println("rpc: error stopping etcd", err)
return err return err
} }
return nil return nil
} }
func (a *Agent) RPCRestart(args struct{}, pid *int) error { func (a *Agent) RPCRestart(args struct{}, pid *int) error {
log.Printf("rpc: restart etcd") plog.Printf("rpc: restart etcd")
err := a.restart() err := a.restart()
if err != nil { if err != nil {
log.Println("rpc: error restarting etcd", err) plog.Println("rpc: error restarting etcd", err)
return err return err
} }
*pid = a.cmd.Process.Pid *pid = a.cmd.Process.Pid
@ -67,38 +66,38 @@ func (a *Agent) RPCRestart(args struct{}, pid *int) error {
} }
func (a *Agent) RPCCleanup(args struct{}, reply *struct{}) error { func (a *Agent) RPCCleanup(args struct{}, reply *struct{}) error {
log.Printf("rpc: cleanup etcd") plog.Printf("rpc: cleanup etcd")
err := a.cleanup() err := a.cleanup()
if err != nil { if err != nil {
log.Println("rpc: error cleaning up etcd", err) plog.Println("rpc: error cleaning up etcd", err)
return err return err
} }
return nil return nil
} }
func (a *Agent) RPCTerminate(args struct{}, reply *struct{}) error { func (a *Agent) RPCTerminate(args struct{}, reply *struct{}) error {
log.Printf("rpc: terminate etcd") plog.Printf("rpc: terminate etcd")
err := a.terminate() err := a.terminate()
if err != nil { if err != nil {
log.Println("rpc: error terminating etcd", err) plog.Println("rpc: error terminating etcd", err)
} }
return nil return nil
} }
func (a *Agent) RPCDropPort(port int, reply *struct{}) error { func (a *Agent) RPCDropPort(port int, reply *struct{}) error {
log.Printf("rpc: drop port %d", port) plog.Printf("rpc: drop port %d", port)
err := a.dropPort(port) err := a.dropPort(port)
if err != nil { if err != nil {
log.Println("rpc: error dropping port", err) plog.Println("rpc: error dropping port", err)
} }
return nil return nil
} }
func (a *Agent) RPCRecoverPort(port int, reply *struct{}) error { func (a *Agent) RPCRecoverPort(port int, reply *struct{}) error {
log.Printf("rpc: recover port %d", port) plog.Printf("rpc: recover port %d", port)
err := a.recoverPort(port) err := a.recoverPort(port)
if err != nil { if err != nil {
log.Println("rpc: error recovering port", err) plog.Println("rpc: error recovering port", err)
} }
return nil return nil
} }

View File

@ -16,7 +16,6 @@ package main
import ( import (
"fmt" "fmt"
"log"
"math/rand" "math/rand"
"net" "net"
"strings" "strings"
@ -252,7 +251,7 @@ func (c *cluster) Status() ClusterStatus {
desc := c.agentEndpoints[i] desc := c.agentEndpoints[i]
if err != nil { if err != nil {
cs.AgentStatuses[desc] = client.Status{State: "unknown"} cs.AgentStatuses[desc] = client.Status{State: "unknown"}
log.Printf("etcd-tester: failed to get the status of agent [%s]", desc) plog.Printf("etcd-tester: failed to get the status of agent [%s]", desc)
} }
cs.AgentStatuses[desc] = s cs.AgentStatuses[desc] = s
} }

View File

@ -16,13 +16,15 @@ package main
import ( import (
"flag" "flag"
"log"
"net/http" "net/http"
"strings" "strings"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/coreos/pkg/capnslog"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus" "github.com/coreos/etcd/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus"
) )
var plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "etcd-tester")
func main() { func main() {
endpointStr := flag.String("agent-endpoints", "localhost:9027", "HTTP RPC endpoints of agents. Do not specify the schema.") endpointStr := flag.String("agent-endpoints", "localhost:9027", "HTTP RPC endpoints of agents. Do not specify the schema.")
datadir := flag.String("data-dir", "agent.etcd", "etcd data directory location on agent machine.") datadir := flag.String("data-dir", "agent.etcd", "etcd data directory location on agent machine.")
@ -35,7 +37,7 @@ func main() {
endpoints := strings.Split(*endpointStr, ",") endpoints := strings.Split(*endpointStr, ",")
c, err := newCluster(endpoints, *datadir, *stressKeySize, *stressKeySuffixRange, *isV2Only) c, err := newCluster(endpoints, *datadir, *stressKeySize, *stressKeySuffixRange, *isV2Only)
if err != nil { if err != nil {
log.Fatal(err) plog.Fatal(err)
} }
defer c.Terminate() defer c.Terminate()
@ -57,7 +59,7 @@ func main() {
sh := statusHandler{status: &t.status} sh := statusHandler{status: &t.status}
http.Handle("/status", sh) http.Handle("/status", sh)
http.Handle("/metrics", prometheus.Handler()) http.Handle("/metrics", prometheus.Handler())
go func() { log.Fatal(http.ListenAndServe(":9028", nil)) }() go func() { plog.Fatal(http.ListenAndServe(":9028", nil)) }()
t.runLoop() t.runLoop()
} }

View File

@ -15,7 +15,6 @@
package main package main
import ( import (
"log"
"sync" "sync"
"time" "time"
) )
@ -46,49 +45,49 @@ func (tt *tester) runLoop() {
tt.status.setCase(j) tt.status.setCase(j)
if err := tt.cluster.WaitHealth(); err != nil { if err := tt.cluster.WaitHealth(); err != nil {
log.Printf("etcd-tester: [round#%d case#%d] wait full health error: %v", i, j, err) plog.Printf("etcd-tester: [round#%d case#%d] wait full health error: %v", i, j, err)
if err := tt.cleanup(i, j); err != nil { if err := tt.cleanup(i, j); err != nil {
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err) plog.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
return return
} }
continue continue
} }
log.Printf("etcd-tester: [round#%d case#%d] start failure %s", i, j, f.Desc()) plog.Printf("etcd-tester: [round#%d case#%d] start failure %s", i, j, f.Desc())
log.Printf("etcd-tester: [round#%d case#%d] start injecting failure...", i, j) plog.Printf("etcd-tester: [round#%d case#%d] start injecting failure...", i, j)
if err := f.Inject(tt.cluster, i); err != nil { if err := f.Inject(tt.cluster, i); err != nil {
log.Printf("etcd-tester: [round#%d case#%d] injection error: %v", i, j, err) plog.Printf("etcd-tester: [round#%d case#%d] injection error: %v", i, j, err)
if err := tt.cleanup(i, j); err != nil { if err := tt.cleanup(i, j); err != nil {
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err) plog.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
return return
} }
continue continue
} }
log.Printf("etcd-tester: [round#%d case#%d] injected failure", i, j) plog.Printf("etcd-tester: [round#%d case#%d] injected failure", i, j)
log.Printf("etcd-tester: [round#%d case#%d] start recovering failure...", i, j) plog.Printf("etcd-tester: [round#%d case#%d] start recovering failure...", i, j)
if err := f.Recover(tt.cluster, i); err != nil { if err := f.Recover(tt.cluster, i); err != nil {
log.Printf("etcd-tester: [round#%d case#%d] recovery error: %v", i, j, err) plog.Printf("etcd-tester: [round#%d case#%d] recovery error: %v", i, j, err)
if err := tt.cleanup(i, j); err != nil { if err := tt.cleanup(i, j); err != nil {
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err) plog.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
return return
} }
continue continue
} }
log.Printf("etcd-tester: [round#%d case#%d] recovered failure", i, j) plog.Printf("etcd-tester: [round#%d case#%d] recovered failure", i, j)
if tt.cluster.v2Only { if tt.cluster.v2Only {
log.Printf("etcd-tester: [round#%d case#%d] succeed!", i, j) plog.Printf("etcd-tester: [round#%d case#%d] succeed!", i, j)
continue continue
} }
log.Printf("etcd-tester: [round#%d case#%d] canceling the stressers...", i, j) plog.Printf("etcd-tester: [round#%d case#%d] canceling the stressers...", i, j)
for _, s := range tt.cluster.Stressers { for _, s := range tt.cluster.Stressers {
s.Cancel() s.Cancel()
} }
log.Printf("etcd-tester: [round#%d case#%d] canceled stressers", i, j) plog.Printf("etcd-tester: [round#%d case#%d] canceled stressers", i, j)
log.Printf("etcd-tester: [round#%d case#%d] checking current revisions...", i, j) plog.Printf("etcd-tester: [round#%d case#%d] checking current revisions...", i, j)
var ( var (
revs map[string]int64 revs map[string]int64
hashes map[string]int64 hashes map[string]int64
@ -100,55 +99,55 @@ func (tt *tester) runLoop() {
revs, hashes, rerr = tt.cluster.getRevisionHash() revs, hashes, rerr = tt.cluster.getRevisionHash()
if rerr != nil { if rerr != nil {
log.Printf("etcd-tester: [round#%d case#%d.%d] failed to get current revisions (%v)", i, j, k, rerr) plog.Printf("etcd-tester: [round#%d case#%d.%d] failed to get current revisions (%v)", i, j, k, rerr)
continue continue
} }
if currentRevision, ok = getSameValue(revs); ok { if currentRevision, ok = getSameValue(revs); ok {
break break
} }
log.Printf("etcd-tester: [round#%d case#%d.%d] inconsistent current revisions %+v", i, j, k, revs) plog.Printf("etcd-tester: [round#%d case#%d.%d] inconsistent current revisions %+v", i, j, k, revs)
} }
if !ok || rerr != nil { if !ok || rerr != nil {
log.Printf("etcd-tester: [round#%d case#%d] checking current revisions failed (%v)", i, j, revs) plog.Printf("etcd-tester: [round#%d case#%d] checking current revisions failed (%v)", i, j, revs)
if err := tt.cleanup(i, j); err != nil { if err := tt.cleanup(i, j); err != nil {
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err) plog.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
return return
} }
continue continue
} }
log.Printf("etcd-tester: [round#%d case#%d] all members are consistent with current revisions", i, j) plog.Printf("etcd-tester: [round#%d case#%d] all members are consistent with current revisions", i, j)
log.Printf("etcd-tester: [round#%d case#%d] checking current storage hashes...", i, j) plog.Printf("etcd-tester: [round#%d case#%d] checking current storage hashes...", i, j)
if _, ok = getSameValue(hashes); !ok { if _, ok = getSameValue(hashes); !ok {
log.Printf("etcd-tester: [round#%d case#%d] checking current storage hashes failed (%v)", i, j, hashes) plog.Printf("etcd-tester: [round#%d case#%d] checking current storage hashes failed (%v)", i, j, hashes)
if err := tt.cleanup(i, j); err != nil { if err := tt.cleanup(i, j); err != nil {
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err) plog.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
return return
} }
continue continue
} }
log.Printf("etcd-tester: [round#%d case#%d] all members are consistent with storage hashes", i, j) plog.Printf("etcd-tester: [round#%d case#%d] all members are consistent with storage hashes", i, j)
log.Printf("etcd-tester: [round#%d case#%d] restarting the stressers...", i, j) plog.Printf("etcd-tester: [round#%d case#%d] restarting the stressers...", i, j)
for _, s := range tt.cluster.Stressers { for _, s := range tt.cluster.Stressers {
go s.Stress() go s.Stress()
} }
log.Printf("etcd-tester: [round#%d case#%d] succeed!", i, j) plog.Printf("etcd-tester: [round#%d case#%d] succeed!", i, j)
} }
revToCompact := max(0, currentRevision-10000) revToCompact := max(0, currentRevision-10000)
log.Printf("etcd-tester: [round#%d] compacting storage at %d (current revision %d)", i, revToCompact, currentRevision) plog.Printf("etcd-tester: [round#%d] compacting storage at %d (current revision %d)", i, revToCompact, currentRevision)
if err := tt.cluster.compactKV(revToCompact); err != nil { if err := tt.cluster.compactKV(revToCompact); err != nil {
log.Printf("etcd-tester: [round#%d] compactKV error (%v)", i, err) plog.Printf("etcd-tester: [round#%d] compactKV error (%v)", i, err)
if err := tt.cleanup(i, 0); err != nil { if err := tt.cleanup(i, 0); err != nil {
log.Printf("etcd-tester: [round#%d] cleanup error: %v", i, err) plog.Printf("etcd-tester: [round#%d] cleanup error: %v", i, err)
return return
} }
continue continue
} }
log.Printf("etcd-tester: [round#%d] compacted storage", i) plog.Printf("etcd-tester: [round#%d] compacted storage", i)
// TODO: make sure compaction is finished // TODO: make sure compaction is finished
time.Sleep(30 * time.Second) time.Sleep(30 * time.Second)
@ -159,7 +158,7 @@ func (tt *tester) cleanup(i, j int) error {
roundFailedTotalCounter.Inc() roundFailedTotalCounter.Inc()
caseFailedTotalCounter.WithLabelValues(tt.failures[j].Desc()).Inc() caseFailedTotalCounter.WithLabelValues(tt.failures[j].Desc()).Inc()
log.Printf("etcd-tester: [round#%d case#%d] cleaning up...", i, j) plog.Printf("etcd-tester: [round#%d case#%d] cleaning up...", i, j)
if err := tt.cluster.Cleanup(); err != nil { if err := tt.cluster.Cleanup(); err != nil {
return err return err
} }