From 79b2777482e9b718a856aa734f9a00e7cdd3e9cc Mon Sep 17 00:00:00 2001 From: Wilson Wang Date: Sat, 22 May 2021 13:56:02 -0700 Subject: [PATCH] tools: add mixed read-write performance evaluation scripts --- tools/benchmark/cmd/txn_mixed.go | 152 +++++++++++++++++++++++ tools/rw-heatmaps/README.md | 26 ++++ tools/rw-heatmaps/plot_data.py | 143 ++++++++++++++++++++++ tools/rw-heatmaps/rw-benchmark.sh | 195 ++++++++++++++++++++++++++++++ 4 files changed, 516 insertions(+) create mode 100644 tools/benchmark/cmd/txn_mixed.go create mode 100644 tools/rw-heatmaps/README.md create mode 100755 tools/rw-heatmaps/plot_data.py create mode 100755 tools/rw-heatmaps/rw-benchmark.sh diff --git a/tools/benchmark/cmd/txn_mixed.go b/tools/benchmark/cmd/txn_mixed.go new file mode 100644 index 000000000..6ec1bb96d --- /dev/null +++ b/tools/benchmark/cmd/txn_mixed.go @@ -0,0 +1,152 @@ +// Copyright 2021 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cmd + +import ( + "context" + "encoding/binary" + "fmt" + "math" + "math/rand" + "os" + "time" + + v3 "go.etcd.io/etcd/client/v3" + "go.etcd.io/etcd/pkg/v3/report" + + "github.com/spf13/cobra" + "golang.org/x/time/rate" + "gopkg.in/cheggaaa/pb.v1" +) + +// mixeTxnCmd represents the mixedTxn command +var mixedTxnCmd = &cobra.Command{ + Use: "txn-mixed key [end-range]", + Short: "Benchmark a mixed load of txn-put & txn-range.", + + Run: mixedTxnFunc, +} + +var ( + mixedTxnTotal int + mixedTxnRate int + mixedTxnReadWriteRatio float64 + mixedTxnRangeLimit int64 + mixedTxnEndKey string + + writeOpsTotal uint64 + readOpsTotal uint64 +) + +func init() { + RootCmd.AddCommand(mixedTxnCmd) + mixedTxnCmd.Flags().IntVar(&keySize, "key-size", 8, "Key size of mixed txn") + mixedTxnCmd.Flags().IntVar(&valSize, "val-size", 8, "Value size of mixed txn") + mixedTxnCmd.Flags().IntVar(&mixedTxnRate, "rate", 0, "Maximum txns per second (0 is no limit)") + mixedTxnCmd.Flags().IntVar(&mixedTxnTotal, "total", 10000, "Total number of txn requests") + mixedTxnCmd.Flags().StringVar(&mixedTxnEndKey, "end-key", "", + "Read operation range end key. By default, we do full range query with the default limit of 1000.") + mixedTxnCmd.Flags().Int64Var(&mixedTxnRangeLimit, "limit", 1000, "Read operation range result limit") + mixedTxnCmd.Flags().IntVar(&keySpaceSize, "key-space-size", 1, "Maximum possible keys") + mixedTxnCmd.Flags().StringVar(&rangeConsistency, "consistency", "l", "Linearizable(l) or Serializable(s)") + mixedTxnCmd.Flags().Float64Var(&mixedTxnReadWriteRatio, "rw-ratio", 1, "Read/write ops ratio") +} + +type request struct { + isWrite bool + op v3.Op +} + +func mixedTxnFunc(cmd *cobra.Command, args []string) { + if keySpaceSize <= 0 { + fmt.Fprintf(os.Stderr, "expected positive --key-space-size, got (%v)", keySpaceSize) + os.Exit(1) + } + + if rangeConsistency == "l" { + fmt.Println("bench with linearizable range") + } else if rangeConsistency == "s" { + fmt.Println("bench with serializable range") + } else { + fmt.Fprintln(os.Stderr, cmd.Usage()) + os.Exit(1) + } + + requests := make(chan request, totalClients) + if mixedTxnRate == 0 { + mixedTxnRate = math.MaxInt32 + } + limit := rate.NewLimiter(rate.Limit(mixedTxnRate), 1) + clients := mustCreateClients(totalClients, totalConns) + k, v := make([]byte, keySize), string(mustRandBytes(valSize)) + + bar = pb.New(mixedTxnTotal) + bar.Format("Bom !") + bar.Start() + + reportRead := newReport() + reportWrite := newReport() + for i := range clients { + wg.Add(1) + go func(c *v3.Client) { + defer wg.Done() + for req := range requests { + limit.Wait(context.Background()) + st := time.Now() + _, err := c.Txn(context.TODO()).Then(req.op).Commit() + if req.isWrite { + reportWrite.Results() <- report.Result{Err: err, Start: st, End: time.Now()} + } else { + reportRead.Results() <- report.Result{Err: err, Start: st, End: time.Now()} + } + bar.Increment() + } + }(clients[i]) + } + + go func() { + for i := 0; i < mixedTxnTotal; i++ { + var req request + if rand.Float64() < mixedTxnReadWriteRatio/(1+mixedTxnReadWriteRatio) { + opts := []v3.OpOption{v3.WithRange(mixedTxnEndKey)} + if rangeConsistency == "s" { + opts = append(opts, v3.WithSerializable()) + } + opts = append(opts, v3.WithPrefix(), v3.WithLimit(mixedTxnRangeLimit)) + req.op = v3.OpGet("", opts...) + req.isWrite = false + readOpsTotal++ + } else { + binary.PutVarint(k, int64(i%keySpaceSize)) + req.op = v3.OpPut(string(k), v) + req.isWrite = true + writeOpsTotal++ + } + requests <- req + } + close(requests) + }() + + rcRead := reportRead.Run() + rcWrite := reportWrite.Run() + wg.Wait() + close(reportRead.Results()) + close(reportWrite.Results()) + bar.Finish() + fmt.Printf("Total Read Ops: %d\nDetails:", readOpsTotal) + fmt.Println(<-rcRead) + fmt.Printf("Total Write Ops: %d\nDetails:", writeOpsTotal) + fmt.Println(<-rcWrite) +} diff --git a/tools/rw-heatmaps/README.md b/tools/rw-heatmaps/README.md new file mode 100644 index 000000000..f6cd0e65f --- /dev/null +++ b/tools/rw-heatmaps/README.md @@ -0,0 +1,26 @@ +# etcd/tools/rw-heatmaps + +`etcd/tools/rw-heatmaps` is the mixed read/write performance evaluation tool for etcd clusters. + +## Execute + +### Benchmark +To get a mixed read/write performance evaluation result: +```sh +# run with default configurations and specify the working directory +./rw-benchmark.sh -w ${WORKING_DIR} +``` +`rw-benchmark.sh` will automatically use the etcd binary compiled under `etcd/bin/` directory. + +Note: the result csv file will be saved to current working directory. The working directory is where etcd database is saved. The working directory is designed for scenarios where a different mounted disk is preferred. + +### Plot Graph +To generate a image based on the benchmark result csv file: +```sh +# to generate a image from one data csv file +./plot_data.py ${FIRST_CSV_FILE} -t ${IMAGE_TITLE} -o ${OUTPUT_IMAGE_NAME} + + +# to generate a image comparing two data csv files +./plot_data.py ${FIRST_CSV_FILE} ${SECOND_CSV_FILE} -t ${IMAGE_TITLE} -o ${OUTPUT_IMAGE_NAME} +``` diff --git a/tools/rw-heatmaps/plot_data.py b/tools/rw-heatmaps/plot_data.py new file mode 100755 index 000000000..c639c1ab2 --- /dev/null +++ b/tools/rw-heatmaps/plot_data.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +import six +import sys +import os +import argparse +import logging +import pandas as pd +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D + + +logging.basicConfig(format='[%(levelname)s %(asctime)s %(name)s] %(message)s') +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +params = None + + +def parse_args(): + parser = argparse.ArgumentParser(description='plot graph using mixed read/write result file.') + parser.add_argument('input_file_a', type=str, + help='first input data files in csv format. (required)') + parser.add_argument('input_file_b', type=str, nargs='?', + help='second input data files in csv format. (optional)') + parser.add_argument('-t', '--title', dest='title', type=str, required=True, + help='plot graph title string') + parser.add_argument('-o', '--output-image', dest='output', type=str, required=True, + help='output image filename') + return parser.parse_args() + + +def load_data_files(*args): + df_list = [] + try: + for i in args: + if i is not None: + logger.debug('loading csv file {}'.format(i)) + df_list.append(pd.read_csv(i)) + except FileNotFoundError as e: + logger.error(str(e)) + sys.exit(1) + res = [] + try: + for df in df_list: + new_df = df[['ratio', 'conn_size', 'value_size']].copy() + tmp = [df[x].str.split(':') for x in ['1', '2', '3', '4', '5']] + + read_df = [x.apply(lambda x: float(x[0])) for x in tmp] + read_avg = sum(read_df)/len(read_df) + new_df['read'] = read_avg + + write_df = [x.apply(lambda x: float(x[1])) for x in tmp] + write_avg = sum(write_df)/len(write_df) + new_df['write'] = write_avg + + new_df['ratio'] = new_df['ratio'].astype(float) + new_df['conn_size'] = new_df['conn_size'].astype(int) + new_df['value_size'] = new_df['value_size'].astype(int) + res.append(new_df) + except Exception as e: + logger.error(str(e)) + sys.exit(1) + return res + + +def plot_data(title, *args): + if len(args) == 1: + figsize = (12, 16) + df0 = args[0] + fig = plt.figure(figsize=figsize) + count = 0 + for val, df in df0.groupby('ratio'): + count += 1 + plt.subplot(4, 2, count) + plt.tripcolor(df['conn_size'], df['value_size'], df['read'] + df['write']) + plt.title('R/W Ratio {:.2f}'.format(val)) + plt.yscale('log', base=2) + plt.ylabel('Value Size') + plt.xscale('log', base=2) + plt.xlabel('Connections Amount') + plt.colorbar() + plt.tight_layout() + elif len(args) == 2: + figsize = (12, 26) + df0 = args[0] + df1 = args[1] + fig = plt.figure(figsize=figsize) + count = 0 + delta_df = df1.copy() + delta_df[['read', 'write']] = (df1[['read', 'write']] - df0[['read', 'write']])/df0[['read', 'write']] + for tmp in [df0, df1, delta_df]: + count += 1 + count2 = count + for val, df in tmp.groupby('ratio'): + plt.subplot(8, 3, count2) + if count2 % 3 == 0: + cmap_name = 'bwr' + else: + cmap_name = 'viridis' + plt.tripcolor(df['conn_size'], df['value_size'], df['read'] + df['write'], cmap=plt.get_cmap(cmap_name)) + if count2 == 1: + plt.title('{}\nR/W Ratio {:.2f}'.format(os.path.basename(params.input_file_a), val)) + elif count2 == 2: + plt.title('{}\nR/W Ratio {:.2f}'.format(os.path.basename(params.input_file_b), val)) + elif count2 == 3: + plt.title('Delta\nR/W Ratio {:.2f}'.format(val)) + else: + plt.title('R/W Ratio {:.2f}'.format(val)) + plt.yscale('log', base=2) + plt.ylabel('Value Size') + plt.xscale('log', base=2) + plt.xlabel('Connections Amount') + plt.colorbar() + plt.tight_layout() + count2 += 3 + else: + raise Exception('invalid plot input data') + fig.suptitle(title) + fig.subplots_adjust(top=0.95) + plt.savefig(params.output) + + +def plot_data_3d(df, title): + fig = plt.figure(figsize=(10, 10)) + ax = fig.add_subplot(projection='3d') + ax.scatter(df['conn_size'], df['value_size'], 1/(1+1/df['ratio']), c=df['read'] + df['write']) + ax.set_title('{}'.format(title)) + ax.set_zlabel('R/W Ratio') + ax.set_ylabel('Value Size') + ax.set_xlabel('Connections Amount') + plt.show() + + +def main(): + global params + logging.basicConfig() + params = parse_args() + result = load_data_files(params.input_file_a, params.input_file_b) + plot_data(params.title, *result) + + +if __name__ == '__main__': + main() diff --git a/tools/rw-heatmaps/rw-benchmark.sh b/tools/rw-heatmaps/rw-benchmark.sh new file mode 100755 index 000000000..8c90f45c3 --- /dev/null +++ b/tools/rw-heatmaps/rw-benchmark.sh @@ -0,0 +1,195 @@ +#!/bin/bash + +#set -x + +RATIO_LIST="1/128 1/8 1/4 1/2 2/1 4/1 8/1 128/1" +VALUE_SIZE_POWER_RANGE="4 10" +CONN_CLI_COUNT_POWER_RANGE="5 12" +REPEAT_COUNT=5 +RUN_COUNT=200000 + +KEY_SIZE=256 +KEY_SPACE_SIZE=$((1024 * 64)) +BACKEND_SIZE="$((20 * 1024 * 1024 * 1024))" +RANGE_RESULT_LIMIT=100 +CLIENT_PORT="23790" + +ETCD_ROOT_DIR="$(cd $(dirname $0) && pwd)/../.." +ETCD_BIN_DIR="${ETCD_ROOT_DIR}/bin" +ETCD_BIN="${ETCD_BIN_DIR}/etcd" +ETCD_BM_BIN="${ETCD_ROOT_DIR}/tools/benchmark/benchmark" + +WORKING_DIR="$(mktemp -d)" +CURRENT_DIR="$(pwd -P)" +OUTPUT_FILE="${CURRENT_DIR}/result-$(date '+%Y%m%d%H%M').csv" + +trap ctrl_c INT + +CURRENT_ETCD_PID= + +function ctrl_c() { + # capture ctrl-c and kill server + echo "terminating..." + kill_etcd_server ${CURRENT_ETCD_PID} + exit 0 +} + +function quit() { + if [ ! -z ${CURRENT_ETCD_PID} ]; then + kill_etcd_server ${CURRENT_ETCD_PID} + fi + exit $1 +} + +function check_prerequisite() { + # check initial parameters + if [ -f "${OUTPUT_FILE}" ]; then + echo "file ${OUTPUT_FILE} already exists." + exit 1 + fi + cat >"${OUTPUT_FILE}" </dev/null & + return $! +} + +function init_etcd_db() { + #initialize etcd database + if [ ! -x ${ETCD_BM_BIN} ]; then + echo "no etcd benchmark binary found at: ${ETCD_BM_BIN}" + quit -1 + fi + echo "initialize etcd database..." + ${ETCD_BM_BIN} put --sequential-keys \ + --key-space-size=${KEY_SPACE_SIZE} \ + --val-size=${VALUE_SIZE} --key-size=${KEY_SIZE} \ + --endpoints http://127.0.0.1:${CLIENT_PORT} \ + --total=${KEY_SPACE_SIZE} \ + &>/dev/null +} + +function kill_etcd_server() { + # kill etcd server + ETCD_PID=$1 + if [ -z "$(ps aux | grep etcd | awk "{print \$2}")" ]; then + echo "failed to find the etcd instance to kill: ${ETCD_PID}" + return + fi + echo "kill etcd server instance" + kill -9 ${ETCD_PID} + wait ${ETCD_PID} 2>/dev/null + sleep 5 +} + +check_prerequisite + +while getopts ":w:c:p:l:vh" OPTION; do + case $OPTION in + h) + echo "usage: $(basename $0) [-h] [-w WORKING_DIR] [-c RUN_COUNT] [-p PORT] [-l RANGE_QUERY_LIMIT] [-v]" >&2 + exit 1 + ;; + w) + WORKING_DIR="${OPTARG}" + ;; + c) + RUN_COUNT="${OPTARG}" + ;; + p) + CLIENT_PORT="${OPTARG}" + ;; + v) + set -x + ;; + l) + RANGE_RESULT_LIMIT="${OPTARG}" + ;; + \?) + echo "usage: $(basename $0) [-h] [-w WORKING_DIR] [-c RUN_COUNT] [-p PORT] [-l RANGE_QUERY_LIMIT] [-v]" >&2 + exit 1 + ;; + esac +done +shift "$((${OPTIND} - 1))" + +pushd "${WORKING_DIR}" > /dev/null + +# progress stats management +ITER_TOTAL=$(($(echo ${RATIO_LIST} | wc | awk "{print \$2}") * \ + $(seq ${VALUE_SIZE_POWER_RANGE} | wc | awk "{print \$2}") * \ + $(seq ${CONN_CLI_COUNT_POWER_RANGE} | wc | awk "{print \$2}"))) +ITER_CURRENT=0 +PERCENTAGE_LAST_PRINT=0 +PERCENTAGE_PRINT_THRESHOLD=5 + +for RATIO_STR in ${RATIO_LIST}; do + RATIO=$(echo "scale=2; ${RATIO_STR}" | bc -l) + for VALUE_SIZE_POWER in $(seq ${VALUE_SIZE_POWER_RANGE}); do + VALUE_SIZE=$((2 ** ${VALUE_SIZE_POWER})) + for CONN_CLI_COUNT_POWER in $(seq ${CONN_CLI_COUNT_POWER_RANGE}); do + + # progress stats management + ITER_CURRENT=$((${ITER_CURRENT} + 1)) + PERCENTAGE_CURRENT=$(echo "scale=3; ${ITER_CURRENT}/${ITER_TOTAL}*100" | bc -l) + if [ "$(echo "${PERCENTAGE_CURRENT} - ${PERCENTAGE_LAST_PRINT} > ${PERCENTAGE_PRINT_THRESHOLD}" | + bc -l)" -eq 1 ]; then + PERCENTAGE_LAST_PRINT=${PERCENTAGE_CURRENT} + echo "${PERCENTAGE_CURRENT}% completed" + fi + + CONN_CLI_COUNT=$((2 ** ${CONN_CLI_COUNT_POWER})) + + run_etcd_server + CURRENT_ETCD_PID=$! + sleep 5 + + init_etcd_db + + START=$(date +%s) + LINE="${RATIO},${CONN_CLI_COUNT},${VALUE_SIZE}" + echo -n "run with setting [${LINE}]" + for i in $(seq ${REPEAT_COUNT}); do + echo -n "." + QPS=$(${ETCD_BM_BIN} txn-mixed "" \ + --conns=${CONN_CLI_COUNT} --clients=${CONN_CLI_COUNT} \ + --total=${RUN_COUNT} \ + --endpoints "http://127.0.0.1:${CLIENT_PORT}" \ + --rw-ratio ${RATIO} --limit ${RANGE_RESULT_LIMIT} \ + 2>/dev/null | grep "Requests/sec" | awk "{print \$2}") + if [ $? -ne 0 ]; then + echo "benchmark command failed: $?" + quit -1 + fi + RD_QPS=$(echo -e "${QPS}" | sed -n '1 p') + WR_QPS=$(echo -e "${QPS}" | sed -n '2 p') + LINE="${LINE},${RD_QPS}:${WR_QPS}" + done + END=$(date +%s) + DIFF=$((${END} - ${START})) + echo "took ${DIFF} seconds" + + cat >>"${OUTPUT_FILE}" < /dev/null