tools: add mixed read-write performance evaluation scripts

This commit is contained in:
Wilson Wang 2021-05-22 13:56:02 -07:00
parent 6c72c1b09c
commit 79b2777482
4 changed files with 516 additions and 0 deletions

View File

@ -0,0 +1,152 @@
// Copyright 2021 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"context"
"encoding/binary"
"fmt"
"math"
"math/rand"
"os"
"time"
v3 "go.etcd.io/etcd/client/v3"
"go.etcd.io/etcd/pkg/v3/report"
"github.com/spf13/cobra"
"golang.org/x/time/rate"
"gopkg.in/cheggaaa/pb.v1"
)
// mixeTxnCmd represents the mixedTxn command
var mixedTxnCmd = &cobra.Command{
Use: "txn-mixed key [end-range]",
Short: "Benchmark a mixed load of txn-put & txn-range.",
Run: mixedTxnFunc,
}
var (
mixedTxnTotal int
mixedTxnRate int
mixedTxnReadWriteRatio float64
mixedTxnRangeLimit int64
mixedTxnEndKey string
writeOpsTotal uint64
readOpsTotal uint64
)
func init() {
RootCmd.AddCommand(mixedTxnCmd)
mixedTxnCmd.Flags().IntVar(&keySize, "key-size", 8, "Key size of mixed txn")
mixedTxnCmd.Flags().IntVar(&valSize, "val-size", 8, "Value size of mixed txn")
mixedTxnCmd.Flags().IntVar(&mixedTxnRate, "rate", 0, "Maximum txns per second (0 is no limit)")
mixedTxnCmd.Flags().IntVar(&mixedTxnTotal, "total", 10000, "Total number of txn requests")
mixedTxnCmd.Flags().StringVar(&mixedTxnEndKey, "end-key", "",
"Read operation range end key. By default, we do full range query with the default limit of 1000.")
mixedTxnCmd.Flags().Int64Var(&mixedTxnRangeLimit, "limit", 1000, "Read operation range result limit")
mixedTxnCmd.Flags().IntVar(&keySpaceSize, "key-space-size", 1, "Maximum possible keys")
mixedTxnCmd.Flags().StringVar(&rangeConsistency, "consistency", "l", "Linearizable(l) or Serializable(s)")
mixedTxnCmd.Flags().Float64Var(&mixedTxnReadWriteRatio, "rw-ratio", 1, "Read/write ops ratio")
}
type request struct {
isWrite bool
op v3.Op
}
func mixedTxnFunc(cmd *cobra.Command, args []string) {
if keySpaceSize <= 0 {
fmt.Fprintf(os.Stderr, "expected positive --key-space-size, got (%v)", keySpaceSize)
os.Exit(1)
}
if rangeConsistency == "l" {
fmt.Println("bench with linearizable range")
} else if rangeConsistency == "s" {
fmt.Println("bench with serializable range")
} else {
fmt.Fprintln(os.Stderr, cmd.Usage())
os.Exit(1)
}
requests := make(chan request, totalClients)
if mixedTxnRate == 0 {
mixedTxnRate = math.MaxInt32
}
limit := rate.NewLimiter(rate.Limit(mixedTxnRate), 1)
clients := mustCreateClients(totalClients, totalConns)
k, v := make([]byte, keySize), string(mustRandBytes(valSize))
bar = pb.New(mixedTxnTotal)
bar.Format("Bom !")
bar.Start()
reportRead := newReport()
reportWrite := newReport()
for i := range clients {
wg.Add(1)
go func(c *v3.Client) {
defer wg.Done()
for req := range requests {
limit.Wait(context.Background())
st := time.Now()
_, err := c.Txn(context.TODO()).Then(req.op).Commit()
if req.isWrite {
reportWrite.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
} else {
reportRead.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
}
bar.Increment()
}
}(clients[i])
}
go func() {
for i := 0; i < mixedTxnTotal; i++ {
var req request
if rand.Float64() < mixedTxnReadWriteRatio/(1+mixedTxnReadWriteRatio) {
opts := []v3.OpOption{v3.WithRange(mixedTxnEndKey)}
if rangeConsistency == "s" {
opts = append(opts, v3.WithSerializable())
}
opts = append(opts, v3.WithPrefix(), v3.WithLimit(mixedTxnRangeLimit))
req.op = v3.OpGet("", opts...)
req.isWrite = false
readOpsTotal++
} else {
binary.PutVarint(k, int64(i%keySpaceSize))
req.op = v3.OpPut(string(k), v)
req.isWrite = true
writeOpsTotal++
}
requests <- req
}
close(requests)
}()
rcRead := reportRead.Run()
rcWrite := reportWrite.Run()
wg.Wait()
close(reportRead.Results())
close(reportWrite.Results())
bar.Finish()
fmt.Printf("Total Read Ops: %d\nDetails:", readOpsTotal)
fmt.Println(<-rcRead)
fmt.Printf("Total Write Ops: %d\nDetails:", writeOpsTotal)
fmt.Println(<-rcWrite)
}

View File

@ -0,0 +1,26 @@
# etcd/tools/rw-heatmaps
`etcd/tools/rw-heatmaps` is the mixed read/write performance evaluation tool for etcd clusters.
## Execute
### Benchmark
To get a mixed read/write performance evaluation result:
```sh
# run with default configurations and specify the working directory
./rw-benchmark.sh -w ${WORKING_DIR}
```
`rw-benchmark.sh` will automatically use the etcd binary compiled under `etcd/bin/` directory.
Note: the result csv file will be saved to current working directory. The working directory is where etcd database is saved. The working directory is designed for scenarios where a different mounted disk is preferred.
### Plot Graph
To generate a image based on the benchmark result csv file:
```sh
# to generate a image from one data csv file
./plot_data.py ${FIRST_CSV_FILE} -t ${IMAGE_TITLE} -o ${OUTPUT_IMAGE_NAME}
# to generate a image comparing two data csv files
./plot_data.py ${FIRST_CSV_FILE} ${SECOND_CSV_FILE} -t ${IMAGE_TITLE} -o ${OUTPUT_IMAGE_NAME}
```

143
tools/rw-heatmaps/plot_data.py Executable file
View File

@ -0,0 +1,143 @@
#!/usr/bin/env python3
import six
import sys
import os
import argparse
import logging
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
logging.basicConfig(format='[%(levelname)s %(asctime)s %(name)s] %(message)s')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
params = None
def parse_args():
parser = argparse.ArgumentParser(description='plot graph using mixed read/write result file.')
parser.add_argument('input_file_a', type=str,
help='first input data files in csv format. (required)')
parser.add_argument('input_file_b', type=str, nargs='?',
help='second input data files in csv format. (optional)')
parser.add_argument('-t', '--title', dest='title', type=str, required=True,
help='plot graph title string')
parser.add_argument('-o', '--output-image', dest='output', type=str, required=True,
help='output image filename')
return parser.parse_args()
def load_data_files(*args):
df_list = []
try:
for i in args:
if i is not None:
logger.debug('loading csv file {}'.format(i))
df_list.append(pd.read_csv(i))
except FileNotFoundError as e:
logger.error(str(e))
sys.exit(1)
res = []
try:
for df in df_list:
new_df = df[['ratio', 'conn_size', 'value_size']].copy()
tmp = [df[x].str.split(':') for x in ['1', '2', '3', '4', '5']]
read_df = [x.apply(lambda x: float(x[0])) for x in tmp]
read_avg = sum(read_df)/len(read_df)
new_df['read'] = read_avg
write_df = [x.apply(lambda x: float(x[1])) for x in tmp]
write_avg = sum(write_df)/len(write_df)
new_df['write'] = write_avg
new_df['ratio'] = new_df['ratio'].astype(float)
new_df['conn_size'] = new_df['conn_size'].astype(int)
new_df['value_size'] = new_df['value_size'].astype(int)
res.append(new_df)
except Exception as e:
logger.error(str(e))
sys.exit(1)
return res
def plot_data(title, *args):
if len(args) == 1:
figsize = (12, 16)
df0 = args[0]
fig = plt.figure(figsize=figsize)
count = 0
for val, df in df0.groupby('ratio'):
count += 1
plt.subplot(4, 2, count)
plt.tripcolor(df['conn_size'], df['value_size'], df['read'] + df['write'])
plt.title('R/W Ratio {:.2f}'.format(val))
plt.yscale('log', base=2)
plt.ylabel('Value Size')
plt.xscale('log', base=2)
plt.xlabel('Connections Amount')
plt.colorbar()
plt.tight_layout()
elif len(args) == 2:
figsize = (12, 26)
df0 = args[0]
df1 = args[1]
fig = plt.figure(figsize=figsize)
count = 0
delta_df = df1.copy()
delta_df[['read', 'write']] = (df1[['read', 'write']] - df0[['read', 'write']])/df0[['read', 'write']]
for tmp in [df0, df1, delta_df]:
count += 1
count2 = count
for val, df in tmp.groupby('ratio'):
plt.subplot(8, 3, count2)
if count2 % 3 == 0:
cmap_name = 'bwr'
else:
cmap_name = 'viridis'
plt.tripcolor(df['conn_size'], df['value_size'], df['read'] + df['write'], cmap=plt.get_cmap(cmap_name))
if count2 == 1:
plt.title('{}\nR/W Ratio {:.2f}'.format(os.path.basename(params.input_file_a), val))
elif count2 == 2:
plt.title('{}\nR/W Ratio {:.2f}'.format(os.path.basename(params.input_file_b), val))
elif count2 == 3:
plt.title('Delta\nR/W Ratio {:.2f}'.format(val))
else:
plt.title('R/W Ratio {:.2f}'.format(val))
plt.yscale('log', base=2)
plt.ylabel('Value Size')
plt.xscale('log', base=2)
plt.xlabel('Connections Amount')
plt.colorbar()
plt.tight_layout()
count2 += 3
else:
raise Exception('invalid plot input data')
fig.suptitle(title)
fig.subplots_adjust(top=0.95)
plt.savefig(params.output)
def plot_data_3d(df, title):
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(projection='3d')
ax.scatter(df['conn_size'], df['value_size'], 1/(1+1/df['ratio']), c=df['read'] + df['write'])
ax.set_title('{}'.format(title))
ax.set_zlabel('R/W Ratio')
ax.set_ylabel('Value Size')
ax.set_xlabel('Connections Amount')
plt.show()
def main():
global params
logging.basicConfig()
params = parse_args()
result = load_data_files(params.input_file_a, params.input_file_b)
plot_data(params.title, *result)
if __name__ == '__main__':
main()

195
tools/rw-heatmaps/rw-benchmark.sh Executable file
View File

@ -0,0 +1,195 @@
#!/bin/bash
#set -x
RATIO_LIST="1/128 1/8 1/4 1/2 2/1 4/1 8/1 128/1"
VALUE_SIZE_POWER_RANGE="4 10"
CONN_CLI_COUNT_POWER_RANGE="5 12"
REPEAT_COUNT=5
RUN_COUNT=200000
KEY_SIZE=256
KEY_SPACE_SIZE=$((1024 * 64))
BACKEND_SIZE="$((20 * 1024 * 1024 * 1024))"
RANGE_RESULT_LIMIT=100
CLIENT_PORT="23790"
ETCD_ROOT_DIR="$(cd $(dirname $0) && pwd)/../.."
ETCD_BIN_DIR="${ETCD_ROOT_DIR}/bin"
ETCD_BIN="${ETCD_BIN_DIR}/etcd"
ETCD_BM_BIN="${ETCD_ROOT_DIR}/tools/benchmark/benchmark"
WORKING_DIR="$(mktemp -d)"
CURRENT_DIR="$(pwd -P)"
OUTPUT_FILE="${CURRENT_DIR}/result-$(date '+%Y%m%d%H%M').csv"
trap ctrl_c INT
CURRENT_ETCD_PID=
function ctrl_c() {
# capture ctrl-c and kill server
echo "terminating..."
kill_etcd_server ${CURRENT_ETCD_PID}
exit 0
}
function quit() {
if [ ! -z ${CURRENT_ETCD_PID} ]; then
kill_etcd_server ${CURRENT_ETCD_PID}
fi
exit $1
}
function check_prerequisite() {
# check initial parameters
if [ -f "${OUTPUT_FILE}" ]; then
echo "file ${OUTPUT_FILE} already exists."
exit 1
fi
cat >"${OUTPUT_FILE}" <<EOF
ratio, conn_size, value_size, 1, 2, 3, 4, 5
EOF
}
function run_etcd_server() {
if [ ! -x ${ETCD_BIN} ]; then
echo "no etcd binary found at: ${ETCD_BIN}"
exit 1
fi
# delete existing data directories
[ -d "db" ] && rm -rf db
[ -d "default.etcd" ] && rm -rf default.etcd/
echo "start etcd server in the background"
${ETCD_BIN} --quota-backend-bytes=${BACKEND_SIZE} \
--log-level 'error' \
--listen-client-urls http://0.0.0.0:${CLIENT_PORT} \
--advertise-client-urls http://127.0.0.1:${CLIENT_PORT} \
&>/dev/null &
return $!
}
function init_etcd_db() {
#initialize etcd database
if [ ! -x ${ETCD_BM_BIN} ]; then
echo "no etcd benchmark binary found at: ${ETCD_BM_BIN}"
quit -1
fi
echo "initialize etcd database..."
${ETCD_BM_BIN} put --sequential-keys \
--key-space-size=${KEY_SPACE_SIZE} \
--val-size=${VALUE_SIZE} --key-size=${KEY_SIZE} \
--endpoints http://127.0.0.1:${CLIENT_PORT} \
--total=${KEY_SPACE_SIZE} \
&>/dev/null
}
function kill_etcd_server() {
# kill etcd server
ETCD_PID=$1
if [ -z "$(ps aux | grep etcd | awk "{print \$2}")" ]; then
echo "failed to find the etcd instance to kill: ${ETCD_PID}"
return
fi
echo "kill etcd server instance"
kill -9 ${ETCD_PID}
wait ${ETCD_PID} 2>/dev/null
sleep 5
}
check_prerequisite
while getopts ":w:c:p:l:vh" OPTION; do
case $OPTION in
h)
echo "usage: $(basename $0) [-h] [-w WORKING_DIR] [-c RUN_COUNT] [-p PORT] [-l RANGE_QUERY_LIMIT] [-v]" >&2
exit 1
;;
w)
WORKING_DIR="${OPTARG}"
;;
c)
RUN_COUNT="${OPTARG}"
;;
p)
CLIENT_PORT="${OPTARG}"
;;
v)
set -x
;;
l)
RANGE_RESULT_LIMIT="${OPTARG}"
;;
\?)
echo "usage: $(basename $0) [-h] [-w WORKING_DIR] [-c RUN_COUNT] [-p PORT] [-l RANGE_QUERY_LIMIT] [-v]" >&2
exit 1
;;
esac
done
shift "$((${OPTIND} - 1))"
pushd "${WORKING_DIR}" > /dev/null
# progress stats management
ITER_TOTAL=$(($(echo ${RATIO_LIST} | wc | awk "{print \$2}") * \
$(seq ${VALUE_SIZE_POWER_RANGE} | wc | awk "{print \$2}") * \
$(seq ${CONN_CLI_COUNT_POWER_RANGE} | wc | awk "{print \$2}")))
ITER_CURRENT=0
PERCENTAGE_LAST_PRINT=0
PERCENTAGE_PRINT_THRESHOLD=5
for RATIO_STR in ${RATIO_LIST}; do
RATIO=$(echo "scale=2; ${RATIO_STR}" | bc -l)
for VALUE_SIZE_POWER in $(seq ${VALUE_SIZE_POWER_RANGE}); do
VALUE_SIZE=$((2 ** ${VALUE_SIZE_POWER}))
for CONN_CLI_COUNT_POWER in $(seq ${CONN_CLI_COUNT_POWER_RANGE}); do
# progress stats management
ITER_CURRENT=$((${ITER_CURRENT} + 1))
PERCENTAGE_CURRENT=$(echo "scale=3; ${ITER_CURRENT}/${ITER_TOTAL}*100" | bc -l)
if [ "$(echo "${PERCENTAGE_CURRENT} - ${PERCENTAGE_LAST_PRINT} > ${PERCENTAGE_PRINT_THRESHOLD}" |
bc -l)" -eq 1 ]; then
PERCENTAGE_LAST_PRINT=${PERCENTAGE_CURRENT}
echo "${PERCENTAGE_CURRENT}% completed"
fi
CONN_CLI_COUNT=$((2 ** ${CONN_CLI_COUNT_POWER}))
run_etcd_server
CURRENT_ETCD_PID=$!
sleep 5
init_etcd_db
START=$(date +%s)
LINE="${RATIO},${CONN_CLI_COUNT},${VALUE_SIZE}"
echo -n "run with setting [${LINE}]"
for i in $(seq ${REPEAT_COUNT}); do
echo -n "."
QPS=$(${ETCD_BM_BIN} txn-mixed "" \
--conns=${CONN_CLI_COUNT} --clients=${CONN_CLI_COUNT} \
--total=${RUN_COUNT} \
--endpoints "http://127.0.0.1:${CLIENT_PORT}" \
--rw-ratio ${RATIO} --limit ${RANGE_RESULT_LIMIT} \
2>/dev/null | grep "Requests/sec" | awk "{print \$2}")
if [ $? -ne 0 ]; then
echo "benchmark command failed: $?"
quit -1
fi
RD_QPS=$(echo -e "${QPS}" | sed -n '1 p')
WR_QPS=$(echo -e "${QPS}" | sed -n '2 p')
LINE="${LINE},${RD_QPS}:${WR_QPS}"
done
END=$(date +%s)
DIFF=$((${END} - ${START}))
echo "took ${DIFF} seconds"
cat >>"${OUTPUT_FILE}" <<EOF
${LINE}
EOF
kill_etcd_server ${CURRENT_ETCD_PID}
done
done
done
popd > /dev/null