functional-tester: plog for milli-second timestamp

Standard log package by default only prints out the second-scale
so the 3rd party log feeder mixes the order of the events, which makes
the debugging hard. This replaces it with capnslog and make them consistent
with all other etcd log formats.
This commit is contained in:
Gyu-Ho Lee 2016-02-17 18:39:05 -08:00
parent 40d3e0daff
commit 7aa62ec595
6 changed files with 59 additions and 58 deletions

View File

@ -16,7 +16,6 @@ package main
import (
"fmt"
"log"
"net"
"os"
"os/exec"
@ -148,7 +147,7 @@ func (a *Agent) cleanup() error {
// https://github.com/torvalds/linux/blob/master/fs/drop_caches.c
cmd := exec.Command("/bin/sh", "-c", `echo "echo 1 > /proc/sys/vm/drop_caches" | sudo sh`)
if err := cmd.Run(); err != nil {
log.Printf("error when cleaning page cache (%v)", err)
plog.Printf("error when cleaning page cache (%v)", err)
}
return nil
}

View File

@ -16,11 +16,14 @@ package main
import (
"flag"
"log"
"os"
"path/filepath"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/coreos/pkg/capnslog"
)
var plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "etcd-agent")
func main() {
etcdPath := flag.String("etcd-path", filepath.Join(os.Getenv("GOPATH"), "bin/etcd"), "the path to etcd binary")
etcdLogPath := flag.String("etcd-log-path", "etcd.log", "the path to etcd log")
@ -29,7 +32,7 @@ func main() {
a, err := newAgent(*etcdPath, *etcdLogPath)
if err != nil {
log.Fatal(err)
plog.Fatal(err)
}
a.serveRPC(*port)

View File

@ -15,7 +15,6 @@
package main
import (
"log"
"net"
"net/http"
"net/rpc"
@ -28,17 +27,17 @@ func (a *Agent) serveRPC(port string) {
rpc.HandleHTTP()
l, e := net.Listen("tcp", port)
if e != nil {
log.Fatal("agent:", e)
plog.Fatal(e)
}
log.Println("agent listening on", port)
plog.Println("agent listening on", port)
go http.Serve(l, nil)
}
func (a *Agent) RPCStart(args []string, pid *int) error {
log.Printf("rpc: start etcd with args %v", args)
plog.Printf("rpc: start etcd with args %v", args)
err := a.start(args...)
if err != nil {
log.Println("rpc: error starting etcd", err)
plog.Println("rpc: error starting etcd", err)
return err
}
*pid = a.cmd.Process.Pid
@ -46,20 +45,20 @@ func (a *Agent) RPCStart(args []string, pid *int) error {
}
func (a *Agent) RPCStop(args struct{}, reply *struct{}) error {
log.Printf("rpc: stop etcd")
plog.Printf("rpc: stop etcd")
err := a.stop()
if err != nil {
log.Println("rpc: error stopping etcd", err)
plog.Println("rpc: error stopping etcd", err)
return err
}
return nil
}
func (a *Agent) RPCRestart(args struct{}, pid *int) error {
log.Printf("rpc: restart etcd")
plog.Printf("rpc: restart etcd")
err := a.restart()
if err != nil {
log.Println("rpc: error restarting etcd", err)
plog.Println("rpc: error restarting etcd", err)
return err
}
*pid = a.cmd.Process.Pid
@ -67,38 +66,38 @@ func (a *Agent) RPCRestart(args struct{}, pid *int) error {
}
func (a *Agent) RPCCleanup(args struct{}, reply *struct{}) error {
log.Printf("rpc: cleanup etcd")
plog.Printf("rpc: cleanup etcd")
err := a.cleanup()
if err != nil {
log.Println("rpc: error cleaning up etcd", err)
plog.Println("rpc: error cleaning up etcd", err)
return err
}
return nil
}
func (a *Agent) RPCTerminate(args struct{}, reply *struct{}) error {
log.Printf("rpc: terminate etcd")
plog.Printf("rpc: terminate etcd")
err := a.terminate()
if err != nil {
log.Println("rpc: error terminating etcd", err)
plog.Println("rpc: error terminating etcd", err)
}
return nil
}
func (a *Agent) RPCDropPort(port int, reply *struct{}) error {
log.Printf("rpc: drop port %d", port)
plog.Printf("rpc: drop port %d", port)
err := a.dropPort(port)
if err != nil {
log.Println("rpc: error dropping port", err)
plog.Println("rpc: error dropping port", err)
}
return nil
}
func (a *Agent) RPCRecoverPort(port int, reply *struct{}) error {
log.Printf("rpc: recover port %d", port)
plog.Printf("rpc: recover port %d", port)
err := a.recoverPort(port)
if err != nil {
log.Println("rpc: error recovering port", err)
plog.Println("rpc: error recovering port", err)
}
return nil
}

View File

@ -16,7 +16,6 @@ package main
import (
"fmt"
"log"
"math/rand"
"net"
"strings"
@ -252,7 +251,7 @@ func (c *cluster) Status() ClusterStatus {
desc := c.agentEndpoints[i]
if err != nil {
cs.AgentStatuses[desc] = client.Status{State: "unknown"}
log.Printf("etcd-tester: failed to get the status of agent [%s]", desc)
plog.Printf("etcd-tester: failed to get the status of agent [%s]", desc)
}
cs.AgentStatuses[desc] = s
}

View File

@ -16,13 +16,15 @@ package main
import (
"flag"
"log"
"net/http"
"strings"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/coreos/pkg/capnslog"
"github.com/coreos/etcd/Godeps/_workspace/src/github.com/prometheus/client_golang/prometheus"
)
var plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "etcd-tester")
func main() {
endpointStr := flag.String("agent-endpoints", "localhost:9027", "HTTP RPC endpoints of agents. Do not specify the schema.")
datadir := flag.String("data-dir", "agent.etcd", "etcd data directory location on agent machine.")
@ -35,7 +37,7 @@ func main() {
endpoints := strings.Split(*endpointStr, ",")
c, err := newCluster(endpoints, *datadir, *stressKeySize, *stressKeySuffixRange, *isV2Only)
if err != nil {
log.Fatal(err)
plog.Fatal(err)
}
defer c.Terminate()
@ -57,7 +59,7 @@ func main() {
sh := statusHandler{status: &t.status}
http.Handle("/status", sh)
http.Handle("/metrics", prometheus.Handler())
go func() { log.Fatal(http.ListenAndServe(":9028", nil)) }()
go func() { plog.Fatal(http.ListenAndServe(":9028", nil)) }()
t.runLoop()
}

View File

@ -15,7 +15,6 @@
package main
import (
"log"
"sync"
"time"
)
@ -46,49 +45,49 @@ func (tt *tester) runLoop() {
tt.status.setCase(j)
if err := tt.cluster.WaitHealth(); err != nil {
log.Printf("etcd-tester: [round#%d case#%d] wait full health error: %v", i, j, err)
plog.Printf("etcd-tester: [round#%d case#%d] wait full health error: %v", i, j, err)
if err := tt.cleanup(i, j); err != nil {
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
plog.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
return
}
continue
}
log.Printf("etcd-tester: [round#%d case#%d] start failure %s", i, j, f.Desc())
plog.Printf("etcd-tester: [round#%d case#%d] start failure %s", i, j, f.Desc())
log.Printf("etcd-tester: [round#%d case#%d] start injecting failure...", i, j)
plog.Printf("etcd-tester: [round#%d case#%d] start injecting failure...", i, j)
if err := f.Inject(tt.cluster, i); err != nil {
log.Printf("etcd-tester: [round#%d case#%d] injection error: %v", i, j, err)
plog.Printf("etcd-tester: [round#%d case#%d] injection error: %v", i, j, err)
if err := tt.cleanup(i, j); err != nil {
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
plog.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
return
}
continue
}
log.Printf("etcd-tester: [round#%d case#%d] injected failure", i, j)
plog.Printf("etcd-tester: [round#%d case#%d] injected failure", i, j)
log.Printf("etcd-tester: [round#%d case#%d] start recovering failure...", i, j)
plog.Printf("etcd-tester: [round#%d case#%d] start recovering failure...", i, j)
if err := f.Recover(tt.cluster, i); err != nil {
log.Printf("etcd-tester: [round#%d case#%d] recovery error: %v", i, j, err)
plog.Printf("etcd-tester: [round#%d case#%d] recovery error: %v", i, j, err)
if err := tt.cleanup(i, j); err != nil {
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
plog.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
return
}
continue
}
log.Printf("etcd-tester: [round#%d case#%d] recovered failure", i, j)
plog.Printf("etcd-tester: [round#%d case#%d] recovered failure", i, j)
if tt.cluster.v2Only {
log.Printf("etcd-tester: [round#%d case#%d] succeed!", i, j)
plog.Printf("etcd-tester: [round#%d case#%d] succeed!", i, j)
continue
}
log.Printf("etcd-tester: [round#%d case#%d] canceling the stressers...", i, j)
plog.Printf("etcd-tester: [round#%d case#%d] canceling the stressers...", i, j)
for _, s := range tt.cluster.Stressers {
s.Cancel()
}
log.Printf("etcd-tester: [round#%d case#%d] canceled stressers", i, j)
plog.Printf("etcd-tester: [round#%d case#%d] canceled stressers", i, j)
log.Printf("etcd-tester: [round#%d case#%d] checking current revisions...", i, j)
plog.Printf("etcd-tester: [round#%d case#%d] checking current revisions...", i, j)
var (
revs map[string]int64
hashes map[string]int64
@ -100,55 +99,55 @@ func (tt *tester) runLoop() {
revs, hashes, rerr = tt.cluster.getRevisionHash()
if rerr != nil {
log.Printf("etcd-tester: [round#%d case#%d.%d] failed to get current revisions (%v)", i, j, k, rerr)
plog.Printf("etcd-tester: [round#%d case#%d.%d] failed to get current revisions (%v)", i, j, k, rerr)
continue
}
if currentRevision, ok = getSameValue(revs); ok {
break
}
log.Printf("etcd-tester: [round#%d case#%d.%d] inconsistent current revisions %+v", i, j, k, revs)
plog.Printf("etcd-tester: [round#%d case#%d.%d] inconsistent current revisions %+v", i, j, k, revs)
}
if !ok || rerr != nil {
log.Printf("etcd-tester: [round#%d case#%d] checking current revisions failed (%v)", i, j, revs)
plog.Printf("etcd-tester: [round#%d case#%d] checking current revisions failed (%v)", i, j, revs)
if err := tt.cleanup(i, j); err != nil {
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
plog.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
return
}
continue
}
log.Printf("etcd-tester: [round#%d case#%d] all members are consistent with current revisions", i, j)
plog.Printf("etcd-tester: [round#%d case#%d] all members are consistent with current revisions", i, j)
log.Printf("etcd-tester: [round#%d case#%d] checking current storage hashes...", i, j)
plog.Printf("etcd-tester: [round#%d case#%d] checking current storage hashes...", i, j)
if _, ok = getSameValue(hashes); !ok {
log.Printf("etcd-tester: [round#%d case#%d] checking current storage hashes failed (%v)", i, j, hashes)
plog.Printf("etcd-tester: [round#%d case#%d] checking current storage hashes failed (%v)", i, j, hashes)
if err := tt.cleanup(i, j); err != nil {
log.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
plog.Printf("etcd-tester: [round#%d case#%d] cleanup error: %v", i, j, err)
return
}
continue
}
log.Printf("etcd-tester: [round#%d case#%d] all members are consistent with storage hashes", i, j)
plog.Printf("etcd-tester: [round#%d case#%d] all members are consistent with storage hashes", i, j)
log.Printf("etcd-tester: [round#%d case#%d] restarting the stressers...", i, j)
plog.Printf("etcd-tester: [round#%d case#%d] restarting the stressers...", i, j)
for _, s := range tt.cluster.Stressers {
go s.Stress()
}
log.Printf("etcd-tester: [round#%d case#%d] succeed!", i, j)
plog.Printf("etcd-tester: [round#%d case#%d] succeed!", i, j)
}
revToCompact := max(0, currentRevision-10000)
log.Printf("etcd-tester: [round#%d] compacting storage at %d (current revision %d)", i, revToCompact, currentRevision)
plog.Printf("etcd-tester: [round#%d] compacting storage at %d (current revision %d)", i, revToCompact, currentRevision)
if err := tt.cluster.compactKV(revToCompact); err != nil {
log.Printf("etcd-tester: [round#%d] compactKV error (%v)", i, err)
plog.Printf("etcd-tester: [round#%d] compactKV error (%v)", i, err)
if err := tt.cleanup(i, 0); err != nil {
log.Printf("etcd-tester: [round#%d] cleanup error: %v", i, err)
plog.Printf("etcd-tester: [round#%d] cleanup error: %v", i, err)
return
}
continue
}
log.Printf("etcd-tester: [round#%d] compacted storage", i)
plog.Printf("etcd-tester: [round#%d] compacted storage", i)
// TODO: make sure compaction is finished
time.Sleep(30 * time.Second)
@ -159,7 +158,7 @@ func (tt *tester) cleanup(i, j int) error {
roundFailedTotalCounter.Inc()
caseFailedTotalCounter.WithLabelValues(tt.failures[j].Desc()).Inc()
log.Printf("etcd-tester: [round#%d case#%d] cleaning up...", i, j)
plog.Printf("etcd-tester: [round#%d case#%d] cleaning up...", i, j)
if err := tt.cluster.Cleanup(); err != nil {
return err
}