tool: save test parameters in mixed read/write benchmark script.

This commit is contained in:
Wilson Wang 2021-05-26 11:48:36 -07:00
parent 71934ff244
commit 8389ab8751
2 changed files with 168 additions and 27 deletions

View File

@ -1,13 +1,12 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import six
import sys import sys
import os import os
import argparse import argparse
import logging import logging
import pandas as pd import pandas as pd
import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D import matplotlib.colors as colors
logging.basicConfig(format='[%(levelname)s %(asctime)s %(name)s] %(message)s') logging.basicConfig(format='[%(levelname)s %(asctime)s %(name)s] %(message)s')
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -24,6 +23,9 @@ def parse_args():
help='second input data files in csv format. (optional)') help='second input data files in csv format. (optional)')
parser.add_argument('-t', '--title', dest='title', type=str, required=True, parser.add_argument('-t', '--title', dest='title', type=str, required=True,
help='plot graph title string') help='plot graph title string')
parser.add_argument('-z', '--zero-centered', dest='zero', type=bool, required=False,
help='plot the improvement graph with white color represents 0.0',
default=True)
parser.add_argument('-o', '--output-image', dest='output', type=str, required=True, parser.add_argument('-o', '--output-image', dest='output', type=str, required=True,
help='output image filename') help='output image filename')
return parser.parse_args() return parser.parse_args()
@ -42,33 +44,142 @@ def load_data_files(*args):
res = [] res = []
try: try:
for df in df_list: for df in df_list:
new_df = df[['ratio', 'conn_size', 'value_size']].copy() param_df = df[df['type'] == 'PARAM']
param_str = ''
if len(param_df) != 0:
param_str = param_df['comment'].iloc[0]
new_df = df[df['type'] == 'DATA'][['ratio', 'conn_size', 'value_size']].copy()
cols = [x for x in df.columns if x.find('iter') != -1] cols = [x for x in df.columns if x.find('iter') != -1]
tmp = [df[x].str.split(':') for x in cols] tmp = [df[df['type'] == 'DATA'][x].str.split(':') for x in cols]
read_df = [x.apply(lambda x: float(x[0])) for x in tmp] read_df = [x.apply(lambda x: float(x[0])) for x in tmp]
read_avg = sum(read_df)/len(read_df) read_avg = sum(read_df) / len(read_df)
new_df['read'] = read_avg new_df['read'] = read_avg
write_df = [x.apply(lambda x: float(x[1])) for x in tmp] write_df = [x.apply(lambda x: float(x[1])) for x in tmp]
write_avg = sum(write_df)/len(write_df) write_avg = sum(write_df) / len(write_df)
new_df['write'] = write_avg new_df['write'] = write_avg
new_df['ratio'] = new_df['ratio'].astype(float) new_df['ratio'] = new_df['ratio'].astype(float)
new_df['conn_size'] = new_df['conn_size'].astype(int) new_df['conn_size'] = new_df['conn_size'].astype(int)
new_df['value_size'] = new_df['value_size'].astype(int) new_df['value_size'] = new_df['value_size'].astype(int)
res.append(new_df) res.append({
'dataframe': new_df,
'param': param_str
})
except Exception as e: except Exception as e:
logger.error(str(e)) logger.error(str(e))
sys.exit(1) sys.exit(1)
return res return res
# This is copied directly from matplotlib source code. Some early versions of matplotlib
# do not have CenteredNorm class
class CenteredNorm(colors.Normalize):
def __init__(self, vcenter=0, halfrange=None, clip=False):
"""
Normalize symmetrical data around a center (0 by default).
Unlike `TwoSlopeNorm`, `CenteredNorm` applies an equal rate of change
around the center.
Useful when mapping symmetrical data around a conceptual center
e.g., data that range from -2 to 4, with 0 as the midpoint, and
with equal rates of change around that midpoint.
Parameters
----------
vcenter : float, default: 0
The data value that defines ``0.5`` in the normalization.
halfrange : float, optional
The range of data values that defines a range of ``0.5`` in the
normalization, so that *vcenter* - *halfrange* is ``0.0`` and
*vcenter* + *halfrange* is ``1.0`` in the normalization.
Defaults to the largest absolute difference to *vcenter* for
the values in the dataset.
Examples
--------
This maps data values -2 to 0.25, 0 to 0.5, and 4 to 1.0
(assuming equal rates of change above and below 0.0):
>>> import matplotlib.colors as mcolors
>>> norm = mcolors.CenteredNorm(halfrange=4.0)
>>> data = [-2., 0., 4.]
>>> norm(data)
array([0.25, 0.5 , 1. ])
"""
self._vcenter = vcenter
self.vmin = None
self.vmax = None
# calling the halfrange setter to set vmin and vmax
self.halfrange = halfrange
self.clip = clip
def _set_vmin_vmax(self):
"""
Set *vmin* and *vmax* based on *vcenter* and *halfrange*.
"""
self.vmax = self._vcenter + self._halfrange
self.vmin = self._vcenter - self._halfrange
def autoscale(self, A):
"""
Set *halfrange* to ``max(abs(A-vcenter))``, then set *vmin* and *vmax*.
"""
A = np.asanyarray(A)
self._halfrange = max(self._vcenter-A.min(),
A.max()-self._vcenter)
self._set_vmin_vmax()
def autoscale_None(self, A):
"""Set *vmin* and *vmax*."""
A = np.asanyarray(A)
if self._halfrange is None and A.size:
self.autoscale(A)
@property
def vcenter(self):
return self._vcenter
@vcenter.setter
def vcenter(self, vcenter):
self._vcenter = vcenter
if self.vmax is not None:
# recompute halfrange assuming vmin and vmax represent
# min and max of data
self._halfrange = max(self._vcenter-self.vmin,
self.vmax-self._vcenter)
self._set_vmin_vmax()
@property
def halfrange(self):
return self._halfrange
@halfrange.setter
def halfrange(self, halfrange):
if halfrange is None:
self._halfrange = None
self.vmin = None
self.vmax = None
else:
self._halfrange = abs(halfrange)
def __call__(self, value, clip=None):
if self._halfrange is not None:
# enforce symmetry, reset vmin and vmax
self._set_vmin_vmax()
return super().__call__(value, clip=clip)
def plot_data(title, *args): def plot_data(title, *args):
if len(args) == 1: if len(args) == 1:
figsize = (12, 16) fig_size = (12, 16)
df0 = args[0] df0 = args[0]['dataframe']
fig = plt.figure(figsize=figsize) df0param = args[0]['param']
fig = plt.figure(figsize=fig_size)
count = 0 count = 0
for val, df in df0.groupby('ratio'): for val, df in df0.groupby('ratio'):
count += 1 count += 1
@ -81,50 +192,69 @@ def plot_data(title, *args):
plt.xlabel('Connections Amount') plt.xlabel('Connections Amount')
plt.colorbar() plt.colorbar()
plt.tight_layout() plt.tight_layout()
fig.suptitle('{}\n{}'.format(title, df0param))
elif len(args) == 2: elif len(args) == 2:
figsize = (12, 26) fig_size = (12, 26)
df0 = args[0] df0 = args[0]['dataframe']
df1 = args[1] df0param = args[0]['param']
fig = plt.figure(figsize=figsize) df1 = args[1]['dataframe']
df1param = args[1]['param']
fig = plt.figure(figsize=fig_size)
count = 0 count = 0
delta_df = df1.copy() delta_df = df1.copy()
delta_df[['read', 'write']] = (df1[['read', 'write']] - df0[['read', 'write']])/df0[['read', 'write']] delta_df[['read', 'write']] = ((df1[['read', 'write']] - df0[['read', 'write']]) /
df0[['read', 'write']]) * 100
for tmp in [df0, df1, delta_df]: for tmp in [df0, df1, delta_df]:
count += 1 count += 1
count2 = count count2 = count
for val, df in tmp.groupby('ratio'): for val, df in tmp.groupby('ratio'):
plt.subplot(8, 3, count2) plt.subplot(8, 3, count2)
norm = None
if count2 % 3 == 0: if count2 % 3 == 0:
cmap_name = 'bwr' cmap_name = 'bwr'
if params.zero:
norm = CenteredNorm()
else: else:
cmap_name = 'viridis' cmap_name = 'viridis'
plt.tripcolor(df['conn_size'], df['value_size'], df['read'] + df['write'], cmap=plt.get_cmap(cmap_name)) plt.tripcolor(df['conn_size'], df['value_size'], df['read'] + df['write'],
norm=norm,
cmap=plt.get_cmap(cmap_name))
if count2 == 1: if count2 == 1:
plt.title('{}\nR/W Ratio {:.4f}'.format(os.path.basename(params.input_file_a), val)) plt.title('{}\nR/W Ratio {:.4f}'.format(
os.path.basename(params.input_file_a),
val))
elif count2 == 2: elif count2 == 2:
plt.title('{}\nR/W Ratio {:.4f}'.format(os.path.basename(params.input_file_b), val)) plt.title('{}\nR/W Ratio {:.4f}'.format(
os.path.basename(params.input_file_b),
val))
elif count2 == 3: elif count2 == 3:
plt.title('Delta\nR/W Ratio {:.4f}'.format(val)) plt.title('Gain\nR/W Ratio {:.4f}'.format(val))
else: else:
plt.title('R/W Ratio {:.4f}'.format(val)) plt.title('R/W Ratio {:.4f}'.format(val))
plt.yscale('log', base=2) plt.yscale('log', base=2)
plt.ylabel('Value Size') plt.ylabel('Value Size')
plt.xscale('log', base=2) plt.xscale('log', base=2)
plt.xlabel('Connections Amount') plt.xlabel('Connections Amount')
if count2 % 3 == 0:
plt.colorbar(format='%.2f%%')
else:
plt.colorbar() plt.colorbar()
plt.tight_layout() plt.tight_layout()
count2 += 3 count2 += 3
fig.suptitle('{}\n{} {}\n{} {}'.format(
title, os.path.basename(params.input_file_a), df0param,
os.path.basename(params.input_file_b), df1param))
else: else:
raise Exception('invalid plot input data') raise Exception('invalid plot input data')
fig.suptitle(title) fig.subplots_adjust(top=0.93)
fig.subplots_adjust(top=0.95)
plt.savefig(params.output) plt.savefig(params.output)
def plot_data_3d(df, title): def plot_data_3d(df, title):
fig = plt.figure(figsize=(10, 10)) fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(projection='3d') ax = fig.add_subplot(projection='3d')
ax.scatter(df['conn_size'], df['value_size'], 1/(1+1/df['ratio']), c=df['read'] + df['write']) ax.scatter(df['conn_size'], df['value_size'], 1 / (1 + 1 / df['ratio']), c=df['read'] + df['write'])
ax.set_title('{}'.format(title)) ax.set_title('{}'.format(title))
ax.set_zlabel('R/W Ratio') ax.set_zlabel('R/W Ratio')
ax.set_ylabel('Value Size') ax.set_ylabel('Value Size')

View File

@ -14,6 +14,8 @@ BACKEND_SIZE="$((20 * 1024 * 1024 * 1024))"
RANGE_RESULT_LIMIT=100 RANGE_RESULT_LIMIT=100
CLIENT_PORT="23790" CLIENT_PORT="23790"
COMMIT=
ETCD_ROOT_DIR="$(cd $(dirname $0) && pwd)/../.." ETCD_ROOT_DIR="$(cd $(dirname $0) && pwd)/../.."
ETCD_BIN_DIR="${ETCD_ROOT_DIR}/bin" ETCD_BIN_DIR="${ETCD_ROOT_DIR}/bin"
ETCD_BIN="${ETCD_BIN_DIR}/etcd" ETCD_BIN="${ETCD_BIN_DIR}/etcd"
@ -47,9 +49,17 @@ function check_prerequisite() {
echo "file ${OUTPUT_FILE} already exists." echo "file ${OUTPUT_FILE} already exists."
exit 1 exit 1
fi fi
pushd ${ETCD_ROOT_DIR} > /dev/null
COMMIT=$(git log --pretty=format:'%h' -n 1)
if [ $? -ne 0 ]; then
COMMIT=N/A
fi
popd > /dev/null
cat >"${OUTPUT_FILE}" <<EOF cat >"${OUTPUT_FILE}" <<EOF
ratio,conn_size,value_size$(for i in $(seq 1 ${REPEAT_COUNT});do echo -n ",iter$i"; done) type,ratio,conn_size,value_size$(for i in $(seq 1 ${REPEAT_COUNT});do echo -n ",iter$i"; done),comment
PARAM,,,$(for i in $(seq 1 ${REPEAT_COUNT});do echo -n ","; done),"key_size=${KEY_SIZE},key_space_size=${KEY_SPACE_SIZE},backend_size=${BACKEND_SIZE},range_limit=${RANGE_RESULT_LIMIT},commit=${COMMIT}"
EOF EOF
} }
function run_etcd_server() { function run_etcd_server() {
@ -97,7 +107,6 @@ function kill_etcd_server() {
sleep 5 sleep 5
} }
check_prerequisite
while getopts ":w:c:p:l:vh" OPTION; do while getopts ":w:c:p:l:vh" OPTION; do
case $OPTION in case $OPTION in
@ -128,6 +137,8 @@ while getopts ":w:c:p:l:vh" OPTION; do
done done
shift "$((${OPTIND} - 1))" shift "$((${OPTIND} - 1))"
check_prerequisite
pushd "${WORKING_DIR}" > /dev/null pushd "${WORKING_DIR}" > /dev/null
# progress stats management # progress stats management
@ -162,7 +173,7 @@ for RATIO_STR in ${RATIO_LIST}; do
init_etcd_db init_etcd_db
START=$(date +%s) START=$(date +%s)
LINE="${RATIO},${CONN_CLI_COUNT},${VALUE_SIZE}" LINE="DATA,${RATIO},${CONN_CLI_COUNT},${VALUE_SIZE}"
echo -n "run with setting [${LINE}]" echo -n "run with setting [${LINE}]"
for i in $(seq ${REPEAT_COUNT}); do for i in $(seq ${REPEAT_COUNT}); do
echo -n "." echo -n "."