add ability to product iops and latency charts

benjoe87 · Aug 16, 2016 · 000c36d · 000c36d
1 parent 3805bf2
commit 000c36d
Show file tree

Hide file tree

Showing 3 changed files with 163 additions and 55 deletions.
diff --git a/reporting/fio_collector.py b/reporting/fio_collector.py
@@ -46,38 +46,86 @@ def format_perf_data(perf_data):
 
     return perf_data
 
+def aggregate_data(data_in, aggr_type='iops'):
+    aggr_data = {}
+    summary_data = []
+    max_element = get_max_listsize(data_in)
+
+    for ptr in range(0,max_element):
+        data_points = []
+        divisor = 0
+        for key in data_in:
+            if data_in[key][ptr] is not None:
+                data_points.append(data_in[key][ptr])
+                divisor +=1 
+        if aggr_type == 'iops':
+            summary_data.append(sum(data_points))
+        elif aggr_type == 'latency':
+            print "%d " % (sum(data_points)/float(divisor))
+            summary_data.append(sum(data_points)/float(divisor))
+
+    aggr_data['Aggregated Data'] = summary_data
+    return aggr_data
 
 def main(options):
 
     perf_data = {}
 
-    json_file_list = get_files(options.fio_file_path)
-    for f in json_file_list:
-        perf_sample = fio_parse.get_json_data(json_file=f, json_path=options.json_key)
-        if perf_sample['status'] == 'OK':
-            del perf_sample['status']
-            for key in perf_sample:
-                if key in perf_data:
-                    perf_data[key].append(perf_sample[key])
-                else:
-                    perf_data[key] = [perf_sample[key]]
-
-    # need to nromalise the data before we can use it
-    fmtd_data = format_perf_data(perf_data)
+    chart_ceiling = None if options.ceiling == "none" else options.ceiling
 
-    chart = FIOPlot(data=fmtd_data,
-                    title=options.title,
-                    xlabel='Concurrent jobs',
-                    ylabel='Response Time (ms)')
-    chart.generate_plot(options.output_file)
+    json_file_list = get_files(options.fio_file_path)
 
-    print fmtd_data
+    if json_file_list:
+        for f in json_file_list:
+            perf_sample = fio_parse.get_json_data(json_file=f, json_path=options.json_key)
+            if perf_sample['status'] == 'OK':
+                del perf_sample['status']
+                for key in perf_sample:
+                    if key in perf_data:
+                        perf_data[key].append(perf_sample[key])
+                    else:
+                        perf_data[key] = [perf_sample[key]]
+
+        # need to add padding to the data to make each entry have the same 
+        # number of observations
+        fmtd_data = format_perf_data(perf_data)
+
+        if options.data_aggregate:
+            fmtd_data = aggregate_data(fmtd_data, options.chart_type)
+
+        chart = FIOPlot(chart_type=options.chart_type,
+                        data=fmtd_data,
+                        title=options.title,
+                        ceiling=chart_ceiling,
+                        xlabel='Concurrent jobs',
+                        ylabel=options.ylabel)
+
+        chart.generate_plot(options.output_file)
+
+        print fmtd_data
+    else:
+        print "no files found matching the path provided %s" % options.fio_file_path
 
 
 if __name__ == "__main__":
-    usage_info = "usage: %prog [options]"
+    usage_info = "Usage: %prog [options]"
 
     parser = OptionParser(usage=usage_info, version="%prog 0.1")
+    parser.add_option("-y", "--yaxis-label", dest="ylabel", action="store",
+                      default="Response Time (ms)",
+                      help="Chart label for the yaxis")
+    parser.add_option("-T", "--chart-type", dest="chart_type", action="store",
+                      choices=['iops','latency'],default='latency',
+                      help="chart type - either iops or [latency]")
+    parser.add_option("-a", "--aggr", dest="data_aggregate", action="store_true",
+                      default=False,
+                      help="aggregate the iops or latency data, instead of per job data")
+    parser.add_option("-D", "--debug", dest="debug", action="store_true",
+                      default=False,
+                      help="turn on debug output")
+    parser.add_option("-c", "--ceiling", dest="ceiling", action="store",
+                      default=50000,
+                      help="(int) ceiling to show Max acceptable values, or none")
     parser.add_option("-p", "--pathname", dest="fio_file_path", action="store",
                       help="file name/path containing fio json output")
     parser.add_option("-k", "--keyname", dest="json_key", action="store",
@@ -88,6 +136,7 @@ def main(options):
                       help="output filename", default="myfile.png")
 
     (options, args) = parser.parse_args()
+
     if options.fio_file_path and options.json_key:
         main(options)
     else:

diff --git a/reporting/fio_parse.py b/reporting/fio_parse.py
@@ -48,7 +48,7 @@ def extract_json_data(json_filename):
 
 
 def get_json_data(json_file, json_path):
-
+    print 'processing file %s' % json_file
     json_data = extract_json_data(json_file)
     status_msg = 'OK'
     response = {}

diff --git a/reporting/fio_plot.py b/reporting/fio_plot.py
@@ -1,71 +1,130 @@
 __author__ = 'paul'
 
+# todo
+
+
 import matplotlib.pyplot as plt
 import matplotlib as mpl
 import numpy as np
+from textwrap import wrap
 
 def convert_2_ms(x, p):
     return "%d" % (x/1000)
 
 class FIOPlot(object):
 
-    def __init__(self, data, latency_ceiling=50000, title='', xlabel='', ylabel=''):
-        # set defaults for the figure facecolor in the interactive window and saved files to white
-        mpl.rcParams['figure.facecolor'] = 'white'
-        mpl.rcParams['savefig.facecolor'] = 'white'
-        self.dataset = data  # dict expected
-        num_entries = self.__get_max_size()
-        self.dataset['Latency Ceiling'] = [latency_ceiling]*num_entries
-        self.xseries = range(1, (num_entries + 1), 1)
-        self.title = title
+    def __init__(self, chart_type, data, ceiling=50000, title='', xlabel='', ylabel=''):
+
+        mpl.rcParams['figure.facecolor'] = 'white'      # interactive chart
+        mpl.rcParams['savefig.facecolor'] = 'white'     # saved chart colour
+        self.chart_type=chart_type
+        self.dataset = data                     # dict expected from the caller
+        #self.num_entries = self.__get_max_size()
+        self.num_entries = len(data)
+        # print "number of entries in the dataset is %d " % self.num_entries
+        if 'Aggregated Data' in self.dataset:
+            # aggregated data received, so define the xaxis by the number of entries
+            self.xseries = range(1, (len(self.dataset['Aggregated Data']) + 1), 1)
+        else:
+            self.xseries = range(1, (self.num_entries + 1), 1)
+
+        if ceiling is not None:
+            self.dataset['Ceiling'] = [ceiling]*len(self.xseries)
+
+        # print "xseries set to %s" % self.xseries
+        self.title = "\n".join(wrap(title, 60))
         self.xlabel = xlabel
         self.ylabel = ylabel
 
-    def __get_max_size(self):
-        return max((len(obs_list)) for key, obs_list in self.dataset.iteritems())
+    #def __get_max_size(self):
+    #    return max((len(obs_list)) for key, obs_list in self.dataset.iteritems())
 
     def generate_plot(self, filename):
         fig, ax = plt.subplots()
-        plt.ylim(ymin=0,ymax=150000)
+
+        # num_cols defines the no. columns in the legend. matplotlib will split the legend
+        # entries across this number of columns as evenly as possible
+        num_cols = (len(self.dataset) // 16) + 1
+
+        # determine the max y axis value by looking at the data
+        y_values = []
+        for key in self.dataset:
+            if key is not "Ceiling":
+                y_values += self.dataset[key]
+
+        y_maximum = max(y_values)*1.2 
+
+        plt.ylim(0,y_maximum)
+
         fig.set_size_inches(13, 8)
+        if self.num_entries > 20:
+            x_major = np.arange(0,len(self.dataset)+1,5)
+        else:
+            #x_major = np.arange(0,len(self.dataset)+1,1)
+            x_major = self.xseries
+
+        # replace the first label since our data series starts at 1 i.e. 1 job
+        x_major[0] = 1
+        x_minor = np.arange(0,len(self.dataset),1)
+
+        ax.set_xticks(x_major)
+        ax.set_xticks(x_minor,minor=True)
+        ax.get_xaxis().set_tick_params(which='both', direction='out')
+        ax.grid(which='minor', alpha=0.5)       # minor grid more faint than major grid
 
         plot_color = iter(plt.cm.Set1(np.linspace(0, 1, len(self.xseries) + 1)))
         for key in sorted(self.dataset):
             c = next(plot_color)
             lwidth = 1
-            if key.startswith('Latency'):
+            plot_marker = None
+            lstyle = 'solid'
+            if key.startswith('Ceiling'):
                 lwidth = 2
+                lstyle = 'dashed'
                 c = 'r'
-            ax.plot(self.xseries, self.dataset[key], c=c, linewidth=lwidth, label=key)
+            elif key.startswith('Aggregated'):
+                plot_marker = '.'
+                c='b'
+
+            ax.plot(self.xseries, 
+                    self.dataset[key], 
+                    ls=lstyle, 
+                    marker=plot_marker, 
+                    markersize=10, 
+                    c=c, 
+                    linewidth=lwidth, 
+                    label=key)
 
         plt.title(self.title)
 
-        # self.xticks(x, xtickmarks)
-        # ax.yaxis.set_ticks(np.arange(0,150000,10000))
-        plt.tick_params(axis='x', top='off')
-        plt.tick_params(axis='y', right='off')
-        #ax.yaxis.set_ticks_position('left')
-        #ax.xaxis.set_ticks_position('bottom')
-        ax.tick_params(direction='out')
+        plt.tick_params(axis='x', which='both', bottom='on', top='off', labelbottom='on')
+        plt.tick_params(axis='y', right='off',direction='out',which='both')
 
-        major_tick = mpl.ticker.MultipleLocator(10000)
-        #major_fmt = mpl.ticker.FormatStrFormatter('%d')
-        #minor_tick = mpl.ticker.MultipleLocator(10000)
-        ax.yaxis.set_major_locator(major_tick)
-        ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(convert_2_ms))
-
-        #ax.yaxis.set_minor_locator(minor_tick)
-        #ax.yaxis.set_minor_formatter(mpl.ticker.FuncFormatter(convert_2_ms))
-        #ax.yaxis.grid(True, which='minor')
-
+        if self.chart_type == 'latency':
+            # latency data is provided in usec, so we need to convert to ms
+
+            ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(convert_2_ms))
+            if y_maximum < 10000:
+                ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(200))
+            #y_interval = int(y_maximum/10) - (int(y_maximum/10) % 1000)
+            #y_major = np.arange(0,y_maximum,y_interval)
+            #y_minor = np.arange(0,y_maximum, int(y_interval/5))
+
+            #ax.set_yticks(y_major)
+            #ax.set_yticks(y_minor,minor=True)
+        else:
+            pass
 
 
         plt.ylabel(self.ylabel)
         plt.xlabel(self.xlabel)
 
         box = ax.get_position()
         ax.set_position([box.x0, box.y0, box.width*0.8, box.height])
-        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False)
-        plt.grid()
-        plt.savefig(filename)
+        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=num_cols, frameon=False)
+
+        # set the font size in the legend to 10
+        plt.setp(plt.gca().get_legend().get_texts(), fontsize='10')
+        plt.grid()                          # show the grid
+        plt.savefig(filename)               # save the graph to a file