From 79b2777482e9b718a856aa734f9a00e7cdd3e9cc Mon Sep 17 00:00:00 2001
From: Wilson Wang <wilson.wang@bytedance.com>
Date: Sat, 22 May 2021 13:56:02 -0700
Subject: [PATCH] tools: add mixed read-write performance evaluation scripts

---
 tools/benchmark/cmd/txn_mixed.go  | 152 +++++++++++++++++++++++
 tools/rw-heatmaps/README.md       |  26 ++++
 tools/rw-heatmaps/plot_data.py    | 143 ++++++++++++++++++++++
 tools/rw-heatmaps/rw-benchmark.sh | 195 ++++++++++++++++++++++++++++++
 4 files changed, 516 insertions(+)
 create mode 100644 tools/benchmark/cmd/txn_mixed.go
 create mode 100644 tools/rw-heatmaps/README.md
 create mode 100755 tools/rw-heatmaps/plot_data.py
 create mode 100755 tools/rw-heatmaps/rw-benchmark.sh

diff --git a/tools/benchmark/cmd/txn_mixed.go b/tools/benchmark/cmd/txn_mixed.go
new file mode 100644
index 000000000..6ec1bb96d
--- /dev/null
+++ b/tools/benchmark/cmd/txn_mixed.go
@@ -0,0 +1,152 @@
+// Copyright 2021 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+	"context"
+	"encoding/binary"
+	"fmt"
+	"math"
+	"math/rand"
+	"os"
+	"time"
+
+	v3 "go.etcd.io/etcd/client/v3"
+	"go.etcd.io/etcd/pkg/v3/report"
+
+	"github.com/spf13/cobra"
+	"golang.org/x/time/rate"
+	"gopkg.in/cheggaaa/pb.v1"
+)
+
+// mixeTxnCmd represents the mixedTxn command
+var mixedTxnCmd = &cobra.Command{
+	Use:   "txn-mixed key [end-range]",
+	Short: "Benchmark a mixed load of txn-put & txn-range.",
+
+	Run: mixedTxnFunc,
+}
+
+var (
+	mixedTxnTotal          int
+	mixedTxnRate           int
+	mixedTxnReadWriteRatio float64
+	mixedTxnRangeLimit     int64
+	mixedTxnEndKey         string
+
+	writeOpsTotal uint64
+	readOpsTotal  uint64
+)
+
+func init() {
+	RootCmd.AddCommand(mixedTxnCmd)
+	mixedTxnCmd.Flags().IntVar(&keySize, "key-size", 8, "Key size of mixed txn")
+	mixedTxnCmd.Flags().IntVar(&valSize, "val-size", 8, "Value size of mixed txn")
+	mixedTxnCmd.Flags().IntVar(&mixedTxnRate, "rate", 0, "Maximum txns per second (0 is no limit)")
+	mixedTxnCmd.Flags().IntVar(&mixedTxnTotal, "total", 10000, "Total number of txn requests")
+	mixedTxnCmd.Flags().StringVar(&mixedTxnEndKey, "end-key", "",
+		"Read operation range end key. By default, we do full range query with the default limit of 1000.")
+	mixedTxnCmd.Flags().Int64Var(&mixedTxnRangeLimit, "limit", 1000, "Read operation range result limit")
+	mixedTxnCmd.Flags().IntVar(&keySpaceSize, "key-space-size", 1, "Maximum possible keys")
+	mixedTxnCmd.Flags().StringVar(&rangeConsistency, "consistency", "l", "Linearizable(l) or Serializable(s)")
+	mixedTxnCmd.Flags().Float64Var(&mixedTxnReadWriteRatio, "rw-ratio", 1, "Read/write ops ratio")
+}
+
+type request struct {
+	isWrite bool
+	op      v3.Op
+}
+
+func mixedTxnFunc(cmd *cobra.Command, args []string) {
+	if keySpaceSize <= 0 {
+		fmt.Fprintf(os.Stderr, "expected positive --key-space-size, got (%v)", keySpaceSize)
+		os.Exit(1)
+	}
+
+	if rangeConsistency == "l" {
+		fmt.Println("bench with linearizable range")
+	} else if rangeConsistency == "s" {
+		fmt.Println("bench with serializable range")
+	} else {
+		fmt.Fprintln(os.Stderr, cmd.Usage())
+		os.Exit(1)
+	}
+
+	requests := make(chan request, totalClients)
+	if mixedTxnRate == 0 {
+		mixedTxnRate = math.MaxInt32
+	}
+	limit := rate.NewLimiter(rate.Limit(mixedTxnRate), 1)
+	clients := mustCreateClients(totalClients, totalConns)
+	k, v := make([]byte, keySize), string(mustRandBytes(valSize))
+
+	bar = pb.New(mixedTxnTotal)
+	bar.Format("Bom !")
+	bar.Start()
+
+	reportRead := newReport()
+	reportWrite := newReport()
+	for i := range clients {
+		wg.Add(1)
+		go func(c *v3.Client) {
+			defer wg.Done()
+			for req := range requests {
+				limit.Wait(context.Background())
+				st := time.Now()
+				_, err := c.Txn(context.TODO()).Then(req.op).Commit()
+				if req.isWrite {
+					reportWrite.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
+				} else {
+					reportRead.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
+				}
+				bar.Increment()
+			}
+		}(clients[i])
+	}
+
+	go func() {
+		for i := 0; i < mixedTxnTotal; i++ {
+			var req request
+			if rand.Float64() < mixedTxnReadWriteRatio/(1+mixedTxnReadWriteRatio) {
+				opts := []v3.OpOption{v3.WithRange(mixedTxnEndKey)}
+				if rangeConsistency == "s" {
+					opts = append(opts, v3.WithSerializable())
+				}
+				opts = append(opts, v3.WithPrefix(), v3.WithLimit(mixedTxnRangeLimit))
+				req.op = v3.OpGet("", opts...)
+				req.isWrite = false
+				readOpsTotal++
+			} else {
+				binary.PutVarint(k, int64(i%keySpaceSize))
+				req.op = v3.OpPut(string(k), v)
+				req.isWrite = true
+				writeOpsTotal++
+			}
+			requests <- req
+		}
+		close(requests)
+	}()
+
+	rcRead := reportRead.Run()
+	rcWrite := reportWrite.Run()
+	wg.Wait()
+	close(reportRead.Results())
+	close(reportWrite.Results())
+	bar.Finish()
+	fmt.Printf("Total Read Ops: %d\nDetails:", readOpsTotal)
+	fmt.Println(<-rcRead)
+	fmt.Printf("Total Write Ops: %d\nDetails:", writeOpsTotal)
+	fmt.Println(<-rcWrite)
+}
diff --git a/tools/rw-heatmaps/README.md b/tools/rw-heatmaps/README.md
new file mode 100644
index 000000000..f6cd0e65f
--- /dev/null
+++ b/tools/rw-heatmaps/README.md
@@ -0,0 +1,26 @@
+# etcd/tools/rw-heatmaps
+
+`etcd/tools/rw-heatmaps` is the mixed read/write performance evaluation tool for etcd clusters.
+
+## Execute
+
+### Benchmark
+To get a mixed read/write performance evaluation result:
+```sh
+# run with default configurations and specify the working directory
+./rw-benchmark.sh -w ${WORKING_DIR}
+```
+`rw-benchmark.sh` will automatically use the etcd binary compiled under `etcd/bin/` directory.
+
+Note: the result csv file will be saved to current working directory. The working directory is where etcd database is saved. The working directory is designed for scenarios where a different mounted disk is preferred.
+
+### Plot Graph
+To generate a image based on the benchmark result csv file:
+```sh
+# to generate a image from one data csv file
+./plot_data.py ${FIRST_CSV_FILE} -t ${IMAGE_TITLE} -o ${OUTPUT_IMAGE_NAME}
+
+
+# to generate a image comparing two data csv files
+./plot_data.py ${FIRST_CSV_FILE} ${SECOND_CSV_FILE} -t ${IMAGE_TITLE} -o ${OUTPUT_IMAGE_NAME}
+```
diff --git a/tools/rw-heatmaps/plot_data.py b/tools/rw-heatmaps/plot_data.py
new file mode 100755
index 000000000..c639c1ab2
--- /dev/null
+++ b/tools/rw-heatmaps/plot_data.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+import six
+import sys
+import os
+import argparse
+import logging
+import pandas as pd
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+
+
+logging.basicConfig(format='[%(levelname)s %(asctime)s %(name)s] %(message)s')
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+params = None
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='plot graph using mixed read/write result file.')
+    parser.add_argument('input_file_a', type=str,
+                        help='first input data files in csv format. (required)')
+    parser.add_argument('input_file_b', type=str, nargs='?',
+                        help='second input data files in csv format. (optional)')
+    parser.add_argument('-t', '--title', dest='title', type=str, required=True,
+                        help='plot graph title string')
+    parser.add_argument('-o', '--output-image', dest='output', type=str, required=True,
+                        help='output image filename')
+    return parser.parse_args()
+
+
+def load_data_files(*args):
+    df_list = []
+    try:
+        for i in args:
+            if i is not None:
+                logger.debug('loading csv file {}'.format(i))
+                df_list.append(pd.read_csv(i))
+    except FileNotFoundError as e:
+        logger.error(str(e))
+        sys.exit(1)
+    res = []
+    try:
+        for df in df_list:
+            new_df = df[['ratio', 'conn_size', 'value_size']].copy()
+            tmp = [df[x].str.split(':') for x in ['1', '2', '3', '4', '5']]
+
+            read_df = [x.apply(lambda x: float(x[0])) for x in tmp]
+            read_avg = sum(read_df)/len(read_df)
+            new_df['read'] = read_avg
+
+            write_df = [x.apply(lambda x: float(x[1])) for x in tmp]
+            write_avg = sum(write_df)/len(write_df)
+            new_df['write'] = write_avg
+
+            new_df['ratio'] = new_df['ratio'].astype(float)
+            new_df['conn_size'] = new_df['conn_size'].astype(int)
+            new_df['value_size'] = new_df['value_size'].astype(int)
+            res.append(new_df)
+    except Exception as e:
+        logger.error(str(e))
+        sys.exit(1)
+    return res
+
+
+def plot_data(title, *args):
+    if len(args) == 1:
+        figsize = (12, 16)
+        df0 = args[0]
+        fig = plt.figure(figsize=figsize)
+        count = 0
+        for val, df in df0.groupby('ratio'):
+            count += 1
+            plt.subplot(4, 2, count)
+            plt.tripcolor(df['conn_size'], df['value_size'], df['read'] + df['write'])
+            plt.title('R/W Ratio {:.2f}'.format(val))
+            plt.yscale('log', base=2)
+            plt.ylabel('Value Size')
+            plt.xscale('log', base=2)
+            plt.xlabel('Connections Amount')
+            plt.colorbar()
+            plt.tight_layout()
+    elif len(args) == 2:
+        figsize = (12, 26)
+        df0 = args[0]
+        df1 = args[1]
+        fig = plt.figure(figsize=figsize)
+        count = 0
+        delta_df = df1.copy()
+        delta_df[['read', 'write']] = (df1[['read', 'write']] - df0[['read', 'write']])/df0[['read', 'write']]
+        for tmp in [df0, df1, delta_df]:
+            count += 1
+            count2 = count
+            for val, df in tmp.groupby('ratio'):
+                plt.subplot(8, 3, count2)
+                if count2 % 3 == 0:
+                    cmap_name = 'bwr'
+                else:
+                    cmap_name = 'viridis'
+                plt.tripcolor(df['conn_size'], df['value_size'], df['read'] + df['write'], cmap=plt.get_cmap(cmap_name))
+                if count2 == 1:
+                    plt.title('{}\nR/W Ratio {:.2f}'.format(os.path.basename(params.input_file_a), val))
+                elif count2 == 2:
+                    plt.title('{}\nR/W Ratio {:.2f}'.format(os.path.basename(params.input_file_b), val))
+                elif count2 == 3:
+                    plt.title('Delta\nR/W Ratio {:.2f}'.format(val))
+                else:
+                    plt.title('R/W Ratio {:.2f}'.format(val))
+                plt.yscale('log', base=2)
+                plt.ylabel('Value Size')
+                plt.xscale('log', base=2)
+                plt.xlabel('Connections Amount')
+                plt.colorbar()
+                plt.tight_layout()
+                count2 += 3
+    else:
+        raise Exception('invalid plot input data')
+    fig.suptitle(title)
+    fig.subplots_adjust(top=0.95)
+    plt.savefig(params.output)
+
+
+def plot_data_3d(df, title):
+    fig = plt.figure(figsize=(10, 10))
+    ax = fig.add_subplot(projection='3d')
+    ax.scatter(df['conn_size'], df['value_size'], 1/(1+1/df['ratio']), c=df['read'] + df['write'])
+    ax.set_title('{}'.format(title))
+    ax.set_zlabel('R/W Ratio')
+    ax.set_ylabel('Value Size')
+    ax.set_xlabel('Connections Amount')
+    plt.show()
+
+
+def main():
+    global params
+    logging.basicConfig()
+    params = parse_args()
+    result = load_data_files(params.input_file_a, params.input_file_b)
+    plot_data(params.title, *result)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/rw-heatmaps/rw-benchmark.sh b/tools/rw-heatmaps/rw-benchmark.sh
new file mode 100755
index 000000000..8c90f45c3
--- /dev/null
+++ b/tools/rw-heatmaps/rw-benchmark.sh
@@ -0,0 +1,195 @@
+#!/bin/bash
+
+#set -x
+
+RATIO_LIST="1/128 1/8 1/4 1/2 2/1 4/1 8/1 128/1"
+VALUE_SIZE_POWER_RANGE="4 10"
+CONN_CLI_COUNT_POWER_RANGE="5 12"
+REPEAT_COUNT=5
+RUN_COUNT=200000
+
+KEY_SIZE=256
+KEY_SPACE_SIZE=$((1024 * 64))
+BACKEND_SIZE="$((20 * 1024 * 1024 * 1024))"
+RANGE_RESULT_LIMIT=100
+CLIENT_PORT="23790"
+
+ETCD_ROOT_DIR="$(cd $(dirname $0) && pwd)/../.."
+ETCD_BIN_DIR="${ETCD_ROOT_DIR}/bin"
+ETCD_BIN="${ETCD_BIN_DIR}/etcd"
+ETCD_BM_BIN="${ETCD_ROOT_DIR}/tools/benchmark/benchmark"
+
+WORKING_DIR="$(mktemp -d)"
+CURRENT_DIR="$(pwd -P)"
+OUTPUT_FILE="${CURRENT_DIR}/result-$(date '+%Y%m%d%H%M').csv"
+
+trap ctrl_c INT
+
+CURRENT_ETCD_PID=
+
+function ctrl_c() {
+  # capture ctrl-c and kill server
+  echo "terminating..."
+  kill_etcd_server ${CURRENT_ETCD_PID}
+  exit 0
+}
+
+function quit() {
+  if [ ! -z ${CURRENT_ETCD_PID} ]; then
+    kill_etcd_server ${CURRENT_ETCD_PID}
+  fi
+  exit $1
+}
+
+function check_prerequisite() {
+  # check initial parameters
+  if [ -f "${OUTPUT_FILE}" ]; then
+    echo "file ${OUTPUT_FILE} already exists."
+    exit 1
+  fi
+  cat >"${OUTPUT_FILE}" <<EOF
+ratio, conn_size, value_size, 1, 2, 3, 4, 5
+EOF
+}
+
+function run_etcd_server() {
+  if [ ! -x ${ETCD_BIN} ]; then
+    echo "no etcd binary found at: ${ETCD_BIN}"
+    exit 1
+  fi
+  # delete existing data directories
+  [ -d "db" ] && rm -rf db
+  [ -d "default.etcd" ] && rm -rf default.etcd/
+  echo "start etcd server in the background"
+  ${ETCD_BIN} --quota-backend-bytes=${BACKEND_SIZE} \
+    --log-level 'error' \
+    --listen-client-urls http://0.0.0.0:${CLIENT_PORT} \
+    --advertise-client-urls http://127.0.0.1:${CLIENT_PORT} \
+    &>/dev/null &
+  return $!
+}
+
+function init_etcd_db() {
+  #initialize etcd database
+  if [ ! -x ${ETCD_BM_BIN} ]; then
+    echo "no etcd benchmark binary found at: ${ETCD_BM_BIN}"
+    quit -1
+  fi
+  echo "initialize etcd database..."
+  ${ETCD_BM_BIN} put --sequential-keys \
+    --key-space-size=${KEY_SPACE_SIZE} \
+    --val-size=${VALUE_SIZE} --key-size=${KEY_SIZE} \
+    --endpoints http://127.0.0.1:${CLIENT_PORT} \
+    --total=${KEY_SPACE_SIZE} \
+    &>/dev/null
+}
+
+function kill_etcd_server() {
+  # kill etcd server
+  ETCD_PID=$1
+  if [ -z "$(ps aux | grep etcd | awk "{print \$2}")" ]; then
+    echo "failed to find the etcd instance to kill: ${ETCD_PID}"
+    return
+  fi
+  echo "kill etcd server instance"
+  kill -9 ${ETCD_PID}
+  wait ${ETCD_PID} 2>/dev/null
+  sleep 5
+}
+
+check_prerequisite
+
+while getopts ":w:c:p:l:vh" OPTION; do
+  case $OPTION in
+  h)
+    echo "usage: $(basename $0) [-h] [-w WORKING_DIR] [-c RUN_COUNT] [-p PORT] [-l RANGE_QUERY_LIMIT] [-v]" >&2
+    exit 1
+    ;;
+  w)
+    WORKING_DIR="${OPTARG}"
+    ;;
+  c)
+    RUN_COUNT="${OPTARG}"
+    ;;
+  p)
+    CLIENT_PORT="${OPTARG}"
+    ;;
+  v)
+    set -x
+    ;;
+  l)
+    RANGE_RESULT_LIMIT="${OPTARG}"
+    ;;
+  \?)
+    echo "usage: $(basename $0) [-h] [-w WORKING_DIR] [-c RUN_COUNT] [-p PORT] [-l RANGE_QUERY_LIMIT] [-v]" >&2
+    exit 1
+    ;;
+  esac
+done
+shift "$((${OPTIND} - 1))"
+
+pushd "${WORKING_DIR}" > /dev/null
+
+# progress stats management
+ITER_TOTAL=$(($(echo ${RATIO_LIST} | wc | awk "{print \$2}") * \
+  $(seq ${VALUE_SIZE_POWER_RANGE} | wc | awk "{print \$2}") * \
+  $(seq ${CONN_CLI_COUNT_POWER_RANGE} | wc | awk "{print \$2}")))
+ITER_CURRENT=0
+PERCENTAGE_LAST_PRINT=0
+PERCENTAGE_PRINT_THRESHOLD=5
+
+for RATIO_STR in ${RATIO_LIST}; do
+  RATIO=$(echo "scale=2; ${RATIO_STR}" | bc -l)
+  for VALUE_SIZE_POWER in $(seq ${VALUE_SIZE_POWER_RANGE}); do
+    VALUE_SIZE=$((2 ** ${VALUE_SIZE_POWER}))
+    for CONN_CLI_COUNT_POWER in $(seq ${CONN_CLI_COUNT_POWER_RANGE}); do
+
+      # progress stats management
+      ITER_CURRENT=$((${ITER_CURRENT} + 1))
+      PERCENTAGE_CURRENT=$(echo "scale=3; ${ITER_CURRENT}/${ITER_TOTAL}*100" | bc -l)
+      if [ "$(echo "${PERCENTAGE_CURRENT} - ${PERCENTAGE_LAST_PRINT} > ${PERCENTAGE_PRINT_THRESHOLD}" |
+        bc -l)" -eq 1 ]; then
+        PERCENTAGE_LAST_PRINT=${PERCENTAGE_CURRENT}
+        echo "${PERCENTAGE_CURRENT}% completed"
+      fi
+
+      CONN_CLI_COUNT=$((2 ** ${CONN_CLI_COUNT_POWER}))
+
+      run_etcd_server
+      CURRENT_ETCD_PID=$!
+      sleep 5
+
+      init_etcd_db
+
+      START=$(date +%s)
+      LINE="${RATIO},${CONN_CLI_COUNT},${VALUE_SIZE}"
+      echo -n "run with setting [${LINE}]"
+      for i in $(seq ${REPEAT_COUNT}); do
+        echo -n "."
+        QPS=$(${ETCD_BM_BIN} txn-mixed "" \
+          --conns=${CONN_CLI_COUNT} --clients=${CONN_CLI_COUNT} \
+          --total=${RUN_COUNT} \
+          --endpoints "http://127.0.0.1:${CLIENT_PORT}" \
+          --rw-ratio ${RATIO} --limit ${RANGE_RESULT_LIMIT} \
+          2>/dev/null | grep "Requests/sec" | awk "{print \$2}")
+        if [ $? -ne 0 ]; then
+          echo "benchmark command failed: $?"
+          quit -1
+        fi
+        RD_QPS=$(echo -e "${QPS}" | sed -n '1 p')
+        WR_QPS=$(echo -e "${QPS}" | sed -n '2 p')
+        LINE="${LINE},${RD_QPS}:${WR_QPS}"
+      done
+      END=$(date +%s)
+      DIFF=$((${END} - ${START}))
+      echo "took ${DIFF} seconds"
+
+      cat >>"${OUTPUT_FILE}" <<EOF
+${LINE}
+EOF
+      kill_etcd_server ${CURRENT_ETCD_PID}
+    done
+  done
+done
+
+popd > /dev/null