tools: add mixed read-write performance evaluation scripts

2024-09-27 06:25:44 +00:00 · 2021-05-22 13:56:02 -07:00 · 2021-05-22 13:56:02 -07:00 · 79b2777482
commit 79b2777482
parent 6c72c1b09c
4 changed files with 516 additions and 0 deletions
--- a/tools/benchmark/cmd/txn_mixed.go
+++ b/tools/benchmark/cmd/txn_mixed.go
@ -0,0 +1,152 @@
 // Copyright 2021 The etcd Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 package cmd
 import (
 	"context"
 	"encoding/binary"
 	"fmt"
 	"math"
 	"math/rand"
 	"os"
 	"time"
 	v3 "go.etcd.io/etcd/client/v3"
 	"go.etcd.io/etcd/pkg/v3/report"
 	"github.com/spf13/cobra"
 	"golang.org/x/time/rate"
 	"gopkg.in/cheggaaa/pb.v1"
 )
 // mixeTxnCmd represents the mixedTxn command
 var mixedTxnCmd = &cobra.Command{
 	Use:   "txn-mixed key [end-range]",
 	Short: "Benchmark a mixed load of txn-put & txn-range.",
 	Run: mixedTxnFunc,
 }
 var (
 	mixedTxnTotal          int
 	mixedTxnRate           int
 	mixedTxnReadWriteRatio float64
 	mixedTxnRangeLimit     int64
 	mixedTxnEndKey         string
 	writeOpsTotal uint64
 	readOpsTotal  uint64
 )
 func init() {
 	RootCmd.AddCommand(mixedTxnCmd)
 	mixedTxnCmd.Flags().IntVar(&keySize, "key-size", 8, "Key size of mixed txn")
 	mixedTxnCmd.Flags().IntVar(&valSize, "val-size", 8, "Value size of mixed txn")
 	mixedTxnCmd.Flags().IntVar(&mixedTxnRate, "rate", 0, "Maximum txns per second (0 is no limit)")
 	mixedTxnCmd.Flags().IntVar(&mixedTxnTotal, "total", 10000, "Total number of txn requests")
 	mixedTxnCmd.Flags().StringVar(&mixedTxnEndKey, "end-key", "",
 		"Read operation range end key. By default, we do full range query with the default limit of 1000.")
 	mixedTxnCmd.Flags().Int64Var(&mixedTxnRangeLimit, "limit", 1000, "Read operation range result limit")
 	mixedTxnCmd.Flags().IntVar(&keySpaceSize, "key-space-size", 1, "Maximum possible keys")
 	mixedTxnCmd.Flags().StringVar(&rangeConsistency, "consistency", "l", "Linearizable(l) or Serializable(s)")
 	mixedTxnCmd.Flags().Float64Var(&mixedTxnReadWriteRatio, "rw-ratio", 1, "Read/write ops ratio")
 }
 type request struct {
 	isWrite bool
 	op      v3.Op
 }
 func mixedTxnFunc(cmd *cobra.Command, args []string) {
 	if keySpaceSize <= 0 {
 		fmt.Fprintf(os.Stderr, "expected positive --key-space-size, got (%v)", keySpaceSize)
 		os.Exit(1)
 	}
 	if rangeConsistency == "l" {
 		fmt.Println("bench with linearizable range")
 	} else if rangeConsistency == "s" {
 		fmt.Println("bench with serializable range")
 	} else {
 		fmt.Fprintln(os.Stderr, cmd.Usage())
 		os.Exit(1)
 	}
 	requests := make(chan request, totalClients)
 	if mixedTxnRate == 0 {
 		mixedTxnRate = math.MaxInt32
 	}
 	limit := rate.NewLimiter(rate.Limit(mixedTxnRate), 1)
 	clients := mustCreateClients(totalClients, totalConns)
 	k, v := make([]byte, keySize), string(mustRandBytes(valSize))
 	bar = pb.New(mixedTxnTotal)
 	bar.Format("Bom !")
 	bar.Start()
 	reportRead := newReport()
 	reportWrite := newReport()
 	for i := range clients {
 		wg.Add(1)
 		go func(c *v3.Client) {
 			defer wg.Done()
 			for req := range requests {
 				limit.Wait(context.Background())
 				st := time.Now()
 				_, err := c.Txn(context.TODO()).Then(req.op).Commit()
 				if req.isWrite {
 					reportWrite.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
 				} else {
 					reportRead.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
 				}
 				bar.Increment()
 			}
 		}(clients[i])
 	}
 	go func() {
 		for i := 0; i < mixedTxnTotal; i++ {
 			var req request
 			if rand.Float64() < mixedTxnReadWriteRatio/(1+mixedTxnReadWriteRatio) {
 				opts := []v3.OpOption{v3.WithRange(mixedTxnEndKey)}
 				if rangeConsistency == "s" {
 					opts = append(opts, v3.WithSerializable())
 				}
 				opts = append(opts, v3.WithPrefix(), v3.WithLimit(mixedTxnRangeLimit))
 				req.op = v3.OpGet("", opts...)
 				req.isWrite = false
 				readOpsTotal++
 			} else {
 				binary.PutVarint(k, int64(i%keySpaceSize))
 				req.op = v3.OpPut(string(k), v)
 				req.isWrite = true
 				writeOpsTotal++
 			}
 			requests <- req
 		}
 		close(requests)
 	}()
 	rcRead := reportRead.Run()
 	rcWrite := reportWrite.Run()
 	wg.Wait()
 	close(reportRead.Results())
 	close(reportWrite.Results())
 	bar.Finish()
 	fmt.Printf("Total Read Ops: %d\nDetails:", readOpsTotal)
 	fmt.Println(<-rcRead)
 	fmt.Printf("Total Write Ops: %d\nDetails:", writeOpsTotal)
 	fmt.Println(<-rcWrite)
 }
--- a/tools/rw-heatmaps/README.md
+++ b/tools/rw-heatmaps/README.md
@ -0,0 +1,26 @@
 # etcd/tools/rw-heatmaps
 `etcd/tools/rw-heatmaps` is the mixed read/write performance evaluation tool for etcd clusters.
 ## Execute
 ### Benchmark
 To get a mixed read/write performance evaluation result:
 ```sh
 # run with default configurations and specify the working directory
 ./rw-benchmark.sh -w ${WORKING_DIR}
 ```
 `rw-benchmark.sh` will automatically use the etcd binary compiled under `etcd/bin/` directory.
 Note: the result csv file will be saved to current working directory. The working directory is where etcd database is saved. The working directory is designed for scenarios where a different mounted disk is preferred.
 ### Plot Graph
 To generate a image based on the benchmark result csv file:
 ```sh
 # to generate a image from one data csv file
 ./plot_data.py ${FIRST_CSV_FILE} -t ${IMAGE_TITLE} -o ${OUTPUT_IMAGE_NAME}
 # to generate a image comparing two data csv files
 ./plot_data.py ${FIRST_CSV_FILE} ${SECOND_CSV_FILE} -t ${IMAGE_TITLE} -o ${OUTPUT_IMAGE_NAME}
 ```
--- a/tools/rw-heatmaps/plot_data.py
+++ b/tools/rw-heatmaps/plot_data.py
@ -0,0 +1,143 @@
 #!/usr/bin/env python3
 import six
 import sys
 import os
 import argparse
 import logging
 import pandas as pd
 import matplotlib.pyplot as plt
 from mpl_toolkits.mplot3d import Axes3D
 logging.basicConfig(format='[%(levelname)s %(asctime)s %(name)s] %(message)s')
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 params = None
 def parse_args():
    parser = argparse.ArgumentParser(description='plot graph using mixed read/write result file.')
    parser.add_argument('input_file_a', type=str,
                        help='first input data files in csv format. (required)')
    parser.add_argument('input_file_b', type=str, nargs='?',
                        help='second input data files in csv format. (optional)')
    parser.add_argument('-t', '--title', dest='title', type=str, required=True,
                        help='plot graph title string')
    parser.add_argument('-o', '--output-image', dest='output', type=str, required=True,
                        help='output image filename')
    return parser.parse_args()
 def load_data_files(*args):
    df_list = []
    try:
        for i in args:
            if i is not None:
                logger.debug('loading csv file {}'.format(i))
                df_list.append(pd.read_csv(i))
    except FileNotFoundError as e:
        logger.error(str(e))
        sys.exit(1)
    res = []
    try:
        for df in df_list:
            new_df = df[['ratio', 'conn_size', 'value_size']].copy()
            tmp = [df[x].str.split(':') for x in ['1', '2', '3', '4', '5']]
            read_df = [x.apply(lambda x: float(x[0])) for x in tmp]
            read_avg = sum(read_df)/len(read_df)
            new_df['read'] = read_avg
            write_df = [x.apply(lambda x: float(x[1])) for x in tmp]
            write_avg = sum(write_df)/len(write_df)
            new_df['write'] = write_avg
            new_df['ratio'] = new_df['ratio'].astype(float)
            new_df['conn_size'] = new_df['conn_size'].astype(int)
            new_df['value_size'] = new_df['value_size'].astype(int)
            res.append(new_df)
    except Exception as e:
        logger.error(str(e))
        sys.exit(1)
    return res
 def plot_data(title, *args):
    if len(args) == 1:
        figsize = (12, 16)
        df0 = args[0]
        fig = plt.figure(figsize=figsize)
        count = 0
        for val, df in df0.groupby('ratio'):
            count += 1
            plt.subplot(4, 2, count)
            plt.tripcolor(df['conn_size'], df['value_size'], df['read'] + df['write'])
            plt.title('R/W Ratio {:.2f}'.format(val))
            plt.yscale('log', base=2)
            plt.ylabel('Value Size')
            plt.xscale('log', base=2)
            plt.xlabel('Connections Amount')
            plt.colorbar()
            plt.tight_layout()
    elif len(args) == 2:
        figsize = (12, 26)
        df0 = args[0]
        df1 = args[1]
        fig = plt.figure(figsize=figsize)
        count = 0
        delta_df = df1.copy()
        delta_df[['read', 'write']] = (df1[['read', 'write']] - df0[['read', 'write']])/df0[['read', 'write']]
        for tmp in [df0, df1, delta_df]:
            count += 1
            count2 = count
            for val, df in tmp.groupby('ratio'):
                plt.subplot(8, 3, count2)
                if count2 % 3 == 0:
                    cmap_name = 'bwr'
                else:
                    cmap_name = 'viridis'
                plt.tripcolor(df['conn_size'], df['value_size'], df['read'] + df['write'], cmap=plt.get_cmap(cmap_name))
                if count2 == 1:
                    plt.title('{}\nR/W Ratio {:.2f}'.format(os.path.basename(params.input_file_a), val))
                elif count2 == 2:
                    plt.title('{}\nR/W Ratio {:.2f}'.format(os.path.basename(params.input_file_b), val))
                elif count2 == 3:
                    plt.title('Delta\nR/W Ratio {:.2f}'.format(val))
                else:
                    plt.title('R/W Ratio {:.2f}'.format(val))
                plt.yscale('log', base=2)
                plt.ylabel('Value Size')
                plt.xscale('log', base=2)
                plt.xlabel('Connections Amount')
                plt.colorbar()
                plt.tight_layout()
                count2 += 3
    else:
        raise Exception('invalid plot input data')
    fig.suptitle(title)
    fig.subplots_adjust(top=0.95)
    plt.savefig(params.output)
 def plot_data_3d(df, title):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(projection='3d')
    ax.scatter(df['conn_size'], df['value_size'], 1/(1+1/df['ratio']), c=df['read'] + df['write'])
    ax.set_title('{}'.format(title))
    ax.set_zlabel('R/W Ratio')
    ax.set_ylabel('Value Size')
    ax.set_xlabel('Connections Amount')
    plt.show()
 def main():
    global params
    logging.basicConfig()
    params = parse_args()
    result = load_data_files(params.input_file_a, params.input_file_b)
    plot_data(params.title, *result)
 if __name__ == '__main__':
    main()
--- a/tools/rw-heatmaps/rw-benchmark.sh
+++ b/tools/rw-heatmaps/rw-benchmark.sh
@ -0,0 +1,195 @@
 #!/bin/bash
 #set -x
 RATIO_LIST="1/128 1/8 1/4 1/2 2/1 4/1 8/1 128/1"
 VALUE_SIZE_POWER_RANGE="4 10"
 CONN_CLI_COUNT_POWER_RANGE="5 12"
 REPEAT_COUNT=5
 RUN_COUNT=200000
 KEY_SIZE=256
 KEY_SPACE_SIZE=$((1024 * 64))
 BACKEND_SIZE="$((20 * 1024 * 1024 * 1024))"
 RANGE_RESULT_LIMIT=100
 CLIENT_PORT="23790"
 ETCD_ROOT_DIR="$(cd $(dirname $0) && pwd)/../.."
 ETCD_BIN_DIR="${ETCD_ROOT_DIR}/bin"
 ETCD_BIN="${ETCD_BIN_DIR}/etcd"
 ETCD_BM_BIN="${ETCD_ROOT_DIR}/tools/benchmark/benchmark"
 WORKING_DIR="$(mktemp -d)"
 CURRENT_DIR="$(pwd -P)"
 OUTPUT_FILE="${CURRENT_DIR}/result-$(date '+%Y%m%d%H%M').csv"
 trap ctrl_c INT
 CURRENT_ETCD_PID=
 function ctrl_c() {
  # capture ctrl-c and kill server
  echo "terminating..."
  kill_etcd_server ${CURRENT_ETCD_PID}
  exit 0
 }
 function quit() {
  if [ ! -z ${CURRENT_ETCD_PID} ]; then
    kill_etcd_server ${CURRENT_ETCD_PID}
  fi
  exit $1
 }
 function check_prerequisite() {
  # check initial parameters
  if [ -f "${OUTPUT_FILE}" ]; then
    echo "file ${OUTPUT_FILE} already exists."
    exit 1
  fi
  cat >"${OUTPUT_FILE}" <<EOF
 ratio, conn_size, value_size, 1, 2, 3, 4, 5
 EOF
 }
 function run_etcd_server() {
  if [ ! -x ${ETCD_BIN} ]; then
    echo "no etcd binary found at: ${ETCD_BIN}"
    exit 1
  fi
  # delete existing data directories
  [ -d "db" ] && rm -rf db
  [ -d "default.etcd" ] && rm -rf default.etcd/
  echo "start etcd server in the background"
  ${ETCD_BIN} --quota-backend-bytes=${BACKEND_SIZE} \
    --log-level 'error' \
    --listen-client-urls http://0.0.0.0:${CLIENT_PORT} \
    --advertise-client-urls http://127.0.0.1:${CLIENT_PORT} \
    &>/dev/null &
  return $!
 }
 function init_etcd_db() {
  #initialize etcd database
  if [ ! -x ${ETCD_BM_BIN} ]; then
    echo "no etcd benchmark binary found at: ${ETCD_BM_BIN}"
    quit -1
  fi
  echo "initialize etcd database..."
  ${ETCD_BM_BIN} put --sequential-keys \
    --key-space-size=${KEY_SPACE_SIZE} \
    --val-size=${VALUE_SIZE} --key-size=${KEY_SIZE} \
    --endpoints http://127.0.0.1:${CLIENT_PORT} \
    --total=${KEY_SPACE_SIZE} \
    &>/dev/null
 }
 function kill_etcd_server() {
  # kill etcd server
  ETCD_PID=$1
  if [ -z "$(ps aux | grep etcd | awk "{print \$2}")" ]; then
    echo "failed to find the etcd instance to kill: ${ETCD_PID}"
    return
  fi
  echo "kill etcd server instance"
  kill -9 ${ETCD_PID}
  wait ${ETCD_PID} 2>/dev/null
  sleep 5
 }
 check_prerequisite
 while getopts ":w:c:p:l:vh" OPTION; do
  case $OPTION in
  h)
    echo "usage: $(basename $0) [-h] [-w WORKING_DIR] [-c RUN_COUNT] [-p PORT] [-l RANGE_QUERY_LIMIT] [-v]" >&2
    exit 1
    ;;
  w)
    WORKING_DIR="${OPTARG}"
    ;;
  c)
    RUN_COUNT="${OPTARG}"
    ;;
  p)
    CLIENT_PORT="${OPTARG}"
    ;;
  v)
    set -x
    ;;
  l)
    RANGE_RESULT_LIMIT="${OPTARG}"
    ;;
  \?)
    echo "usage: $(basename $0) [-h] [-w WORKING_DIR] [-c RUN_COUNT] [-p PORT] [-l RANGE_QUERY_LIMIT] [-v]" >&2
    exit 1
    ;;
  esac
 done
 shift "$((${OPTIND} - 1))"
 pushd "${WORKING_DIR}" > /dev/null
 # progress stats management
 ITER_TOTAL=$(($(echo ${RATIO_LIST} | wc | awk "{print \$2}") * \
  $(seq ${VALUE_SIZE_POWER_RANGE} | wc | awk "{print \$2}") * \
  $(seq ${CONN_CLI_COUNT_POWER_RANGE} | wc | awk "{print \$2}")))
 ITER_CURRENT=0
 PERCENTAGE_LAST_PRINT=0
 PERCENTAGE_PRINT_THRESHOLD=5
 for RATIO_STR in ${RATIO_LIST}; do
  RATIO=$(echo "scale=2; ${RATIO_STR}" | bc -l)
  for VALUE_SIZE_POWER in $(seq ${VALUE_SIZE_POWER_RANGE}); do
    VALUE_SIZE=$((2 ** ${VALUE_SIZE_POWER}))
    for CONN_CLI_COUNT_POWER in $(seq ${CONN_CLI_COUNT_POWER_RANGE}); do
      # progress stats management
      ITER_CURRENT=$((${ITER_CURRENT} + 1))
      PERCENTAGE_CURRENT=$(echo "scale=3; ${ITER_CURRENT}/${ITER_TOTAL}*100" | bc -l)
      if [ "$(echo "${PERCENTAGE_CURRENT} - ${PERCENTAGE_LAST_PRINT} > ${PERCENTAGE_PRINT_THRESHOLD}" |
        bc -l)" -eq 1 ]; then
        PERCENTAGE_LAST_PRINT=${PERCENTAGE_CURRENT}
        echo "${PERCENTAGE_CURRENT}% completed"
      fi
      CONN_CLI_COUNT=$((2 ** ${CONN_CLI_COUNT_POWER}))
      run_etcd_server
      CURRENT_ETCD_PID=$!
      sleep 5
      init_etcd_db
      START=$(date +%s)
      LINE="${RATIO},${CONN_CLI_COUNT},${VALUE_SIZE}"
      echo -n "run with setting [${LINE}]"
      for i in $(seq ${REPEAT_COUNT}); do
        echo -n "."
        QPS=$(${ETCD_BM_BIN} txn-mixed "" \
          --conns=${CONN_CLI_COUNT} --clients=${CONN_CLI_COUNT} \
          --total=${RUN_COUNT} \
          --endpoints "http://127.0.0.1:${CLIENT_PORT}" \
          --rw-ratio ${RATIO} --limit ${RANGE_RESULT_LIMIT} \
          2>/dev/null | grep "Requests/sec" | awk "{print \$2}")
        if [ $? -ne 0 ]; then
          echo "benchmark command failed: $?"
          quit -1
        fi
        RD_QPS=$(echo -e "${QPS}" | sed -n '1 p')
        WR_QPS=$(echo -e "${QPS}" | sed -n '2 p')
        LINE="${LINE},${RD_QPS}:${WR_QPS}"
      done
      END=$(date +%s)
      DIFF=$((${END} - ${START}))
      echo "took ${DIFF} seconds"
      cat >>"${OUTPUT_FILE}" <<EOF
 ${LINE}
 EOF
      kill_etcd_server ${CURRENT_ETCD_PID}
    done
  done
 done
 popd > /dev/null