diff --git a/reporting/fio_collector.py b/reporting/fio_collector.py index 2233fbf..11147b3 100755 --- a/reporting/fio_collector.py +++ b/reporting/fio_collector.py @@ -46,38 +46,86 @@ def format_perf_data(perf_data): return perf_data +def aggregate_data(data_in, aggr_type='iops'): + aggr_data = {} + summary_data = [] + max_element = get_max_listsize(data_in) + + for ptr in range(0,max_element): + data_points = [] + divisor = 0 + for key in data_in: + if data_in[key][ptr] is not None: + data_points.append(data_in[key][ptr]) + divisor +=1 + if aggr_type == 'iops': + summary_data.append(sum(data_points)) + elif aggr_type == 'latency': + print "%d " % (sum(data_points)/float(divisor)) + summary_data.append(sum(data_points)/float(divisor)) + + aggr_data['Aggregated Data'] = summary_data + return aggr_data def main(options): perf_data = {} - json_file_list = get_files(options.fio_file_path) - for f in json_file_list: - perf_sample = fio_parse.get_json_data(json_file=f, json_path=options.json_key) - if perf_sample['status'] == 'OK': - del perf_sample['status'] - for key in perf_sample: - if key in perf_data: - perf_data[key].append(perf_sample[key]) - else: - perf_data[key] = [perf_sample[key]] - - # need to nromalise the data before we can use it - fmtd_data = format_perf_data(perf_data) + chart_ceiling = None if options.ceiling == "none" else options.ceiling - chart = FIOPlot(data=fmtd_data, - title=options.title, - xlabel='Concurrent jobs', - ylabel='Response Time (ms)') - chart.generate_plot(options.output_file) + json_file_list = get_files(options.fio_file_path) - print fmtd_data + if json_file_list: + for f in json_file_list: + perf_sample = fio_parse.get_json_data(json_file=f, json_path=options.json_key) + if perf_sample['status'] == 'OK': + del perf_sample['status'] + for key in perf_sample: + if key in perf_data: + perf_data[key].append(perf_sample[key]) + else: + perf_data[key] = [perf_sample[key]] + + # need to add padding to the data to make each entry have the same + # number of observations + fmtd_data = format_perf_data(perf_data) + + if options.data_aggregate: + fmtd_data = aggregate_data(fmtd_data, options.chart_type) + + chart = FIOPlot(chart_type=options.chart_type, + data=fmtd_data, + title=options.title, + ceiling=chart_ceiling, + xlabel='Concurrent jobs', + ylabel=options.ylabel) + + chart.generate_plot(options.output_file) + + print fmtd_data + else: + print "no files found matching the path provided %s" % options.fio_file_path if __name__ == "__main__": - usage_info = "usage: %prog [options]" + usage_info = "Usage: %prog [options]" parser = OptionParser(usage=usage_info, version="%prog 0.1") + parser.add_option("-y", "--yaxis-label", dest="ylabel", action="store", + default="Response Time (ms)", + help="Chart label for the yaxis") + parser.add_option("-T", "--chart-type", dest="chart_type", action="store", + choices=['iops','latency'],default='latency', + help="chart type - either iops or [latency]") + parser.add_option("-a", "--aggr", dest="data_aggregate", action="store_true", + default=False, + help="aggregate the iops or latency data, instead of per job data") + parser.add_option("-D", "--debug", dest="debug", action="store_true", + default=False, + help="turn on debug output") + parser.add_option("-c", "--ceiling", dest="ceiling", action="store", + default=50000, + help="(int) ceiling to show Max acceptable values, or none") parser.add_option("-p", "--pathname", dest="fio_file_path", action="store", help="file name/path containing fio json output") parser.add_option("-k", "--keyname", dest="json_key", action="store", @@ -88,6 +136,7 @@ def main(options): help="output filename", default="myfile.png") (options, args) = parser.parse_args() + if options.fio_file_path and options.json_key: main(options) else: diff --git a/reporting/fio_parse.py b/reporting/fio_parse.py index c742457..ec84437 100755 --- a/reporting/fio_parse.py +++ b/reporting/fio_parse.py @@ -48,7 +48,7 @@ def extract_json_data(json_filename): def get_json_data(json_file, json_path): - + print 'processing file %s' % json_file json_data = extract_json_data(json_file) status_msg = 'OK' response = {} diff --git a/reporting/fio_plot.py b/reporting/fio_plot.py index c59ef1d..4a32653 100644 --- a/reporting/fio_plot.py +++ b/reporting/fio_plot.py @@ -1,63 +1,119 @@ __author__ = 'paul' +# todo + + import matplotlib.pyplot as plt import matplotlib as mpl import numpy as np +from textwrap import wrap def convert_2_ms(x, p): return "%d" % (x/1000) class FIOPlot(object): - def __init__(self, data, latency_ceiling=50000, title='', xlabel='', ylabel=''): - # set defaults for the figure facecolor in the interactive window and saved files to white - mpl.rcParams['figure.facecolor'] = 'white' - mpl.rcParams['savefig.facecolor'] = 'white' - self.dataset = data # dict expected - num_entries = self.__get_max_size() - self.dataset['Latency Ceiling'] = [latency_ceiling]*num_entries - self.xseries = range(1, (num_entries + 1), 1) - self.title = title + def __init__(self, chart_type, data, ceiling=50000, title='', xlabel='', ylabel=''): + + mpl.rcParams['figure.facecolor'] = 'white' # interactive chart + mpl.rcParams['savefig.facecolor'] = 'white' # saved chart colour + self.chart_type=chart_type + self.dataset = data # dict expected from the caller + #self.num_entries = self.__get_max_size() + self.num_entries = len(data) + # print "number of entries in the dataset is %d " % self.num_entries + if 'Aggregated Data' in self.dataset: + # aggregated data received, so define the xaxis by the number of entries + self.xseries = range(1, (len(self.dataset['Aggregated Data']) + 1), 1) + else: + self.xseries = range(1, (self.num_entries + 1), 1) + + if ceiling is not None: + self.dataset['Ceiling'] = [ceiling]*len(self.xseries) + + # print "xseries set to %s" % self.xseries + self.title = "\n".join(wrap(title, 60)) self.xlabel = xlabel self.ylabel = ylabel - def __get_max_size(self): - return max((len(obs_list)) for key, obs_list in self.dataset.iteritems()) + #def __get_max_size(self): + # return max((len(obs_list)) for key, obs_list in self.dataset.iteritems()) def generate_plot(self, filename): fig, ax = plt.subplots() - plt.ylim(ymin=0,ymax=150000) + + # num_cols defines the no. columns in the legend. matplotlib will split the legend + # entries across this number of columns as evenly as possible + num_cols = (len(self.dataset) // 16) + 1 + + # determine the max y axis value by looking at the data + y_values = [] + for key in self.dataset: + if key is not "Ceiling": + y_values += self.dataset[key] + + y_maximum = max(y_values)*1.2 + + plt.ylim(0,y_maximum) + fig.set_size_inches(13, 8) + if self.num_entries > 20: + x_major = np.arange(0,len(self.dataset)+1,5) + else: + #x_major = np.arange(0,len(self.dataset)+1,1) + x_major = self.xseries + + # replace the first label since our data series starts at 1 i.e. 1 job + x_major[0] = 1 + x_minor = np.arange(0,len(self.dataset),1) + + ax.set_xticks(x_major) + ax.set_xticks(x_minor,minor=True) + ax.get_xaxis().set_tick_params(which='both', direction='out') + ax.grid(which='minor', alpha=0.5) # minor grid more faint than major grid plot_color = iter(plt.cm.Set1(np.linspace(0, 1, len(self.xseries) + 1))) for key in sorted(self.dataset): c = next(plot_color) lwidth = 1 - if key.startswith('Latency'): + plot_marker = None + lstyle = 'solid' + if key.startswith('Ceiling'): lwidth = 2 + lstyle = 'dashed' c = 'r' - ax.plot(self.xseries, self.dataset[key], c=c, linewidth=lwidth, label=key) + elif key.startswith('Aggregated'): + plot_marker = '.' + c='b' + + ax.plot(self.xseries, + self.dataset[key], + ls=lstyle, + marker=plot_marker, + markersize=10, + c=c, + linewidth=lwidth, + label=key) plt.title(self.title) - # self.xticks(x, xtickmarks) - # ax.yaxis.set_ticks(np.arange(0,150000,10000)) - plt.tick_params(axis='x', top='off') - plt.tick_params(axis='y', right='off') - #ax.yaxis.set_ticks_position('left') - #ax.xaxis.set_ticks_position('bottom') - ax.tick_params(direction='out') + plt.tick_params(axis='x', which='both', bottom='on', top='off', labelbottom='on') + plt.tick_params(axis='y', right='off',direction='out',which='both') - major_tick = mpl.ticker.MultipleLocator(10000) - #major_fmt = mpl.ticker.FormatStrFormatter('%d') - #minor_tick = mpl.ticker.MultipleLocator(10000) - ax.yaxis.set_major_locator(major_tick) - ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(convert_2_ms)) - - #ax.yaxis.set_minor_locator(minor_tick) - #ax.yaxis.set_minor_formatter(mpl.ticker.FuncFormatter(convert_2_ms)) - #ax.yaxis.grid(True, which='minor') - + if self.chart_type == 'latency': + # latency data is provided in usec, so we need to convert to ms + + ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(convert_2_ms)) + if y_maximum < 10000: + ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(200)) + #y_interval = int(y_maximum/10) - (int(y_maximum/10) % 1000) + #y_major = np.arange(0,y_maximum,y_interval) + #y_minor = np.arange(0,y_maximum, int(y_interval/5)) + + #ax.set_yticks(y_major) + #ax.set_yticks(y_minor,minor=True) + else: + pass plt.ylabel(self.ylabel) @@ -65,7 +121,10 @@ def generate_plot(self, filename): box = ax.get_position() ax.set_position([box.x0, box.y0, box.width*0.8, box.height]) - ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False) - plt.grid() - plt.savefig(filename) + ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=num_cols, frameon=False) + + # set the font size in the legend to 10 + plt.setp(plt.gca().get_legend().get_texts(), fontsize='10') + plt.grid() # show the grid + plt.savefig(filename) # save the graph to a file