tool: save test parameters in mixed read/write benchmark script.

2024-09-27 06:25:44 +00:00 · 2021-05-26 11:48:36 -07:00 · 2021-05-26 11:48:36 -07:00 · 8389ab8751
commit 8389ab8751
parent 71934ff244
2 changed files with 168 additions and 27 deletions
--- a/tools/rw-heatmaps/plot_data.py
+++ b/tools/rw-heatmaps/plot_data.py
@ -1,13 +1,12 @@
 #!/usr/bin/env python3
-import six
 import sys
 import os
 import argparse
 import logging
 import pandas as pd
+import numpy as np
 import matplotlib.pyplot as plt
-from mpl_toolkits.mplot3d import Axes3D
-
+import matplotlib.colors as colors

 logging.basicConfig(format='[%(levelname)s %(asctime)s %(name)s] %(message)s')
 logger = logging.getLogger(__name__)
@ -24,6 +23,9 @@ def parse_args():
                        help='second input data files in csv format. (optional)')
    parser.add_argument('-t', '--title', dest='title', type=str, required=True,
                        help='plot graph title string')
+    parser.add_argument('-z', '--zero-centered', dest='zero', type=bool, required=False,
+                        help='plot the improvement graph with white color represents 0.0',
+                        default=True)
    parser.add_argument('-o', '--output-image', dest='output', type=str, required=True,
                        help='output image filename')
    return parser.parse_args()
@ -42,33 +44,142 @@ def load_data_files(*args):
    res = []
    try:
        for df in df_list:
-            new_df = df[['ratio', 'conn_size', 'value_size']].copy()
+            param_df = df[df['type'] == 'PARAM']
+            param_str = ''
+            if len(param_df) != 0:
+                param_str = param_df['comment'].iloc[0]
+            new_df = df[df['type'] == 'DATA'][['ratio', 'conn_size', 'value_size']].copy()
            cols = [x for x in df.columns if x.find('iter') != -1]
-            tmp = [df[x].str.split(':') for x in cols]
+            tmp = [df[df['type'] == 'DATA'][x].str.split(':') for x in cols]

            read_df = [x.apply(lambda x: float(x[0])) for x in tmp]
-            read_avg = sum(read_df)/len(read_df)
+            read_avg = sum(read_df) / len(read_df)
            new_df['read'] = read_avg

            write_df = [x.apply(lambda x: float(x[1])) for x in tmp]
-            write_avg = sum(write_df)/len(write_df)
+            write_avg = sum(write_df) / len(write_df)
            new_df['write'] = write_avg

            new_df['ratio'] = new_df['ratio'].astype(float)
            new_df['conn_size'] = new_df['conn_size'].astype(int)
            new_df['value_size'] = new_df['value_size'].astype(int)
-            res.append(new_df)
+            res.append({
+                'dataframe': new_df,
+                'param': param_str
+            })
    except Exception as e:
        logger.error(str(e))
        sys.exit(1)
    return res


+# This is copied directly from matplotlib source code. Some early versions of matplotlib
+# do not have CenteredNorm class
+class CenteredNorm(colors.Normalize):
+
+    def __init__(self, vcenter=0, halfrange=None, clip=False):
+        """
+        Normalize symmetrical data around a center (0 by default).
+
+        Unlike `TwoSlopeNorm`, `CenteredNorm` applies an equal rate of change
+        around the center.
+
+        Useful when mapping symmetrical data around a conceptual center
+        e.g., data that range from -2 to 4, with 0 as the midpoint, and
+        with equal rates of change around that midpoint.
+
+        Parameters
+        ----------
+        vcenter : float, default: 0
+            The data value that defines ``0.5`` in the normalization.
+        halfrange : float, optional
+            The range of data values that defines a range of ``0.5`` in the
+            normalization, so that *vcenter* - *halfrange* is ``0.0`` and
+            *vcenter* + *halfrange* is ``1.0`` in the normalization.
+            Defaults to the largest absolute difference to *vcenter* for
+            the values in the dataset.
+
+        Examples
+        --------
+        This maps data values -2 to 0.25, 0 to 0.5, and 4 to 1.0
+        (assuming equal rates of change above and below 0.0):
+
+            >>> import matplotlib.colors as mcolors
+            >>> norm = mcolors.CenteredNorm(halfrange=4.0)
+            >>> data = [-2., 0., 4.]
+            >>> norm(data)
+            array([0.25, 0.5 , 1.  ])
+        """
+        self._vcenter = vcenter
+        self.vmin = None
+        self.vmax = None
+        # calling the halfrange setter to set vmin and vmax
+        self.halfrange = halfrange
+        self.clip = clip
+
+
+    def _set_vmin_vmax(self):
+        """
+        Set *vmin* and *vmax* based on *vcenter* and *halfrange*.
+        """
+        self.vmax = self._vcenter + self._halfrange
+        self.vmin = self._vcenter - self._halfrange
+
+    def autoscale(self, A):
+        """
+        Set *halfrange* to ``max(abs(A-vcenter))``, then set *vmin* and *vmax*.
+        """
+        A = np.asanyarray(A)
+        self._halfrange = max(self._vcenter-A.min(),
+                              A.max()-self._vcenter)
+        self._set_vmin_vmax()
+
+    def autoscale_None(self, A):
+        """Set *vmin* and *vmax*."""
+        A = np.asanyarray(A)
+        if self._halfrange is None and A.size:
+            self.autoscale(A)
+
+    @property
+    def vcenter(self):
+        return self._vcenter
+
+    @vcenter.setter
+    def vcenter(self, vcenter):
+        self._vcenter = vcenter
+        if self.vmax is not None:
+            # recompute halfrange assuming vmin and vmax represent
+            # min and max of data
+            self._halfrange = max(self._vcenter-self.vmin,
+                                  self.vmax-self._vcenter)
+            self._set_vmin_vmax()
+
+    @property
+    def halfrange(self):
+        return self._halfrange
+
+    @halfrange.setter
+    def halfrange(self, halfrange):
+        if halfrange is None:
+            self._halfrange = None
+            self.vmin = None
+            self.vmax = None
+        else:
+            self._halfrange = abs(halfrange)
+
+    def __call__(self, value, clip=None):
+        if self._halfrange is not None:
+            # enforce symmetry, reset vmin and vmax
+            self._set_vmin_vmax()
+        return super().__call__(value, clip=clip)
+
+
 def plot_data(title, *args):
    if len(args) == 1:
-        figsize = (12, 16)
-        df0 = args[0]
-        fig = plt.figure(figsize=figsize)
+        fig_size = (12, 16)
+        df0 = args[0]['dataframe']
+        df0param = args[0]['param']
+        fig = plt.figure(figsize=fig_size)
        count = 0
        for val, df in df0.groupby('ratio'):
            count += 1
@ -81,50 +192,69 @@ def plot_data(title, *args):
            plt.xlabel('Connections Amount')
            plt.colorbar()
            plt.tight_layout()
+        fig.suptitle('{}\n{}'.format(title, df0param))
    elif len(args) == 2:
-        figsize = (12, 26)
-        df0 = args[0]
-        df1 = args[1]
-        fig = plt.figure(figsize=figsize)
+        fig_size = (12, 26)
+        df0 = args[0]['dataframe']
+        df0param = args[0]['param']
+        df1 = args[1]['dataframe']
+        df1param = args[1]['param']
+        fig = plt.figure(figsize=fig_size)
        count = 0
        delta_df = df1.copy()
-        delta_df[['read', 'write']] = (df1[['read', 'write']] - df0[['read', 'write']])/df0[['read', 'write']]
+        delta_df[['read', 'write']] = ((df1[['read', 'write']] - df0[['read', 'write']]) /
+                                       df0[['read', 'write']]) * 100
        for tmp in [df0, df1, delta_df]:
            count += 1
            count2 = count
            for val, df in tmp.groupby('ratio'):
                plt.subplot(8, 3, count2)
+                norm = None
                if count2 % 3 == 0:
                    cmap_name = 'bwr'
+                    if params.zero:
+                        norm = CenteredNorm()
                else:
                    cmap_name = 'viridis'
-                plt.tripcolor(df['conn_size'], df['value_size'], df['read'] + df['write'], cmap=plt.get_cmap(cmap_name))
+                plt.tripcolor(df['conn_size'], df['value_size'], df['read'] + df['write'],
+                              norm=norm,
+                              cmap=plt.get_cmap(cmap_name))
                if count2 == 1:
-                    plt.title('{}\nR/W Ratio {:.4f}'.format(os.path.basename(params.input_file_a), val))
+                    plt.title('{}\nR/W Ratio {:.4f}'.format(
+                        os.path.basename(params.input_file_a),
+                        val))
                elif count2 == 2:
-                    plt.title('{}\nR/W Ratio {:.4f}'.format(os.path.basename(params.input_file_b), val))
+                    plt.title('{}\nR/W Ratio {:.4f}'.format(
+                        os.path.basename(params.input_file_b),
+                        val))
                elif count2 == 3:
-                    plt.title('Delta\nR/W Ratio {:.4f}'.format(val))
+                    plt.title('Gain\nR/W Ratio {:.4f}'.format(val))
                else:
                    plt.title('R/W Ratio {:.4f}'.format(val))
                plt.yscale('log', base=2)
                plt.ylabel('Value Size')
                plt.xscale('log', base=2)
                plt.xlabel('Connections Amount')
-                plt.colorbar()
+
+                if count2 % 3 == 0:
+                    plt.colorbar(format='%.2f%%')
+                else:
+                    plt.colorbar()
                plt.tight_layout()
                count2 += 3
+        fig.suptitle('{}\n{}    {}\n{}    {}'.format(
+            title, os.path.basename(params.input_file_a), df0param,
+            os.path.basename(params.input_file_b), df1param))
    else:
        raise Exception('invalid plot input data')
-    fig.suptitle(title)
-    fig.subplots_adjust(top=0.95)
+    fig.subplots_adjust(top=0.93)
    plt.savefig(params.output)


 def plot_data_3d(df, title):
    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(projection='3d')
-    ax.scatter(df['conn_size'], df['value_size'], 1/(1+1/df['ratio']), c=df['read'] + df['write'])
+    ax.scatter(df['conn_size'], df['value_size'], 1 / (1 + 1 / df['ratio']), c=df['read'] + df['write'])
    ax.set_title('{}'.format(title))
    ax.set_zlabel('R/W Ratio')
    ax.set_ylabel('Value Size')
--- a/tools/rw-heatmaps/rw-benchmark.sh
+++ b/tools/rw-heatmaps/rw-benchmark.sh
@ -14,6 +14,8 @@ BACKEND_SIZE="$((20 * 1024 * 1024 * 1024))"
 RANGE_RESULT_LIMIT=100
 CLIENT_PORT="23790"

+COMMIT=
+
 ETCD_ROOT_DIR="$(cd $(dirname $0) && pwd)/../.."
 ETCD_BIN_DIR="${ETCD_ROOT_DIR}/bin"
 ETCD_BIN="${ETCD_BIN_DIR}/etcd"
@ -47,9 +49,17 @@ function check_prerequisite() {
    echo "file ${OUTPUT_FILE} already exists."
    exit 1
  fi
+  pushd ${ETCD_ROOT_DIR} > /dev/null
+  COMMIT=$(git log --pretty=format:'%h' -n 1)
+  if [ $? -ne 0 ]; then
+    COMMIT=N/A
+  fi
+  popd > /dev/null
  cat >"${OUTPUT_FILE}" <<EOF
-ratio,conn_size,value_size$(for i in $(seq 1 ${REPEAT_COUNT});do echo -n ",iter$i"; done)
+type,ratio,conn_size,value_size$(for i in $(seq 1 ${REPEAT_COUNT});do echo -n ",iter$i"; done),comment
+PARAM,,,$(for i in $(seq 1 ${REPEAT_COUNT});do echo -n ","; done),"key_size=${KEY_SIZE},key_space_size=${KEY_SPACE_SIZE},backend_size=${BACKEND_SIZE},range_limit=${RANGE_RESULT_LIMIT},commit=${COMMIT}"
 EOF
+
 }

 function run_etcd_server() {
@ -97,7 +107,6 @@ function kill_etcd_server() {
  sleep 5
 }

-check_prerequisite

 while getopts ":w:c:p:l:vh" OPTION; do
  case $OPTION in
@ -128,6 +137,8 @@ while getopts ":w:c:p:l:vh" OPTION; do
 done
 shift "$((${OPTIND} - 1))"

+check_prerequisite
+
 pushd "${WORKING_DIR}" > /dev/null

 # progress stats management
@ -162,7 +173,7 @@ for RATIO_STR in ${RATIO_LIST}; do
      init_etcd_db

      START=$(date +%s)
-      LINE="${RATIO},${CONN_CLI_COUNT},${VALUE_SIZE}"
+      LINE="DATA,${RATIO},${CONN_CLI_COUNT},${VALUE_SIZE}"
      echo -n "run with setting [${LINE}]"
      for i in $(seq ${REPEAT_COUNT}); do
        echo -n "."