diff --git a/index.html b/index.html index b660a6f..5f82a15 100644 --- a/index.html +++ b/index.html @@ -6,6 +6,6 @@

The page has moved to: - OPAL example page

+ OPAL example page

\ No newline at end of file diff --git a/opal.py b/opal.py index bd19c3d..7bc5b8a 100755 --- a/opal.py +++ b/opal.py @@ -304,6 +304,7 @@ def main(): group2.add_argument('-t', '--time', help='Comma-separated runtimes in hours', required=False) group2.add_argument('-m', '--memory', help='Comma-separated memory usages in gigabytes', required=False) group2.add_argument('-d', '--desc', help='Description for HTML page', required=False) + group2.add_argument('-r', '--ranks', help='Highest and lowest taxonomic ranks to consider in performance rankings, comma-separated. Valid ranks: superkingdom, phylum, class, order, family, genus, species, strain (default:superkingdom,species)', required=False) group2.add_argument('--silent', help='Silent mode', action='store_true') group2.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__) group2.add_argument('-h', '--help', action='help', help='Show this help message and exit') @@ -357,7 +358,7 @@ def main(): logger.info('done') logger.info('Computing rankings...') - pd_rankings = rk.highscore_table(pd_metrics) + pd_rankings, ranks_scored = rk.highscore_table(pd_metrics, args.ranks) logger.info('done') if time_list or memory_list: @@ -366,7 +367,7 @@ def main(): logger.info('done') logger.info('Creating HTML page...') - html.create_html(pd_rankings, pd_metrics, labels, sample_ids_list, plots_list, output_dir, args.desc) + html.create_html(pd_rankings, ranks_scored, pd_metrics, labels, sample_ids_list, plots_list, output_dir, args.desc) logger.info('done') logger.info('OPAL finished successfully. All results have been saved to {}'.format(output_dir)) diff --git a/src/html_opal.py b/src/html_opal.py index be1669b..0dcd03e 100644 --- a/src/html_opal.py +++ b/src/html_opal.py @@ -137,7 +137,7 @@ def get_formatted_pd_rankings(pd_rankings): return pd_show, pd_show_unsorted_pos -def create_rankings_html(pd_rankings): +def create_rankings_html(pd_rankings, ranks_scored): pd_show, pd_show_unsorted_pos = get_formatted_pd_rankings(pd_rankings) table_source = ColumnDataSource(pd_show) @@ -199,7 +199,8 @@ def create_rankings_html(pd_rankings): p = figure(x_range=pd_show_unsorted_pos[SUM_OF_SCORES].tolist(), plot_width=800, plot_height=400, title=SUM_OF_SCORES + " - lower is better") p.vbar(x='x', top='top', source=source, width=0.5, bottom=0, color="firebrick") - col_rankings = column([Div(text="Hint 1: click on the columns of scores for sorting.", style={"width": "500px", "margin-bottom": "10px"}), + col_rankings = column([Div(text="Hint 1: click on the columns of scores for sorting.", style={"width": "600px", "margin-bottom": "0px"}), + Div(text="Taxonomic ranks scored: " + ", ".join(ranks_scored), style={"width": "600px", "margin-bottom": "0px"}), data_table, Div(text="Hint 2: slide the bars to change the weight of the metrics.", style={"width": "500px", "margin-top": "18px"}), row(weight_recall, weight_precision), @@ -482,8 +483,8 @@ def create_computing_efficiency_tab(pd_metrics, plots_list, tabs_list): tabs_list.append(Panel(child=column_time_memory, title="Computing efficiency")) -def create_html(pd_rankings, pd_metrics, labels, sample_ids_list, plots_list, output_dir, desc_text): - col_rankings = create_rankings_html(pd_rankings) +def create_html(pd_rankings, ranks_scored, pd_metrics, labels, sample_ids_list, plots_list, output_dir, desc_text): + col_rankings = create_rankings_html(pd_rankings, ranks_scored) create_heatmap_bar(output_dir) @@ -491,7 +492,7 @@ def create_html(pd_rankings, pd_metrics, labels, sample_ids_list, plots_list, ou tabs_plots = create_plots_html(plots_list) - metrics_row = row(column(select_sample, select_rank, heatmap_legend_div, mytable1, sizing_mode='scale_width', css_classes=['bk-width-auto', 'bk-height-auto', 'bk-inline-block']), column(tabs_plots, sizing_mode='scale_width', css_classes=['bk-width-auto', 'bk-inline-block']), css_classes=['bk-width-auto', 'bk-inline-block'], sizing_mode='scale_width') + metrics_row = row(column(row(select_sample, select_rank, css_classes=['bk-width-auto', 'bk-combo-box']), heatmap_legend_div, mytable1, sizing_mode='scale_width', css_classes=['bk-width-auto', 'bk-height-auto', 'bk-inline-block']), column(tabs_plots, sizing_mode='scale_width', css_classes=['bk-width-auto', 'bk-inline-block']), css_classes=['bk-width-auto', 'bk-inline-block'], sizing_mode='scale_width') beta_div_column = create_beta_diversity_tab(labels, plots_list) diff --git a/src/rankings.py b/src/rankings.py index 7df385d..f9b6f6f 100644 --- a/src/rankings.py +++ b/src/rankings.py @@ -2,9 +2,23 @@ from src.utils import constants as c import pandas as pd +import logging -def highscore_table(metrics, useranks=['phylum', 'class', 'order', 'family', 'genus']): +def get_user_ranks_list(ranks): + rank_high_low = [x.strip() for x in ranks.split(',')] + if len(rank_high_low) != 2 or rank_high_low[0] not in c.ALL_RANKS or rank_high_low[1] not in c.ALL_RANKS: + logging.getLogger('opal').warning('Invalid ranks provided with option --ranks. Default will be used.') + return c.ALL_RANKS[:7] + index1 = c.ALL_RANKS.index(rank_high_low[0]) + index2 = c.ALL_RANKS.index(rank_high_low[1]) + if index1 < index2: + return c.ALL_RANKS[index1:index2 + 1] + else: + return c.ALL_RANKS[index2:index1 + 1] + + +def highscore_table(metrics, ranks): """Compile a ranking table like Figure 3c of CAMI publication. Note that Figure 3c took into account mean scores for all samples of one of the three @@ -18,7 +32,7 @@ def highscore_table(metrics, useranks=['phylum', 'class', 'order', 'family', 'ge Information about metrics of tool performance. Must contain columns: metric, rank, tool, value useranks : [str] - Default: 'phylum', 'class', 'order', 'family', 'genus' + Old default (CAMI 1): 'phylum', 'class', 'order', 'family', 'genus' Which ranks should be considered for rank dependent metrics. Here we decided to exclude e.g. species, because most profilers fail at that rank and we don't want to emphasize on this rank. @@ -26,6 +40,11 @@ def highscore_table(metrics, useranks=['phylum', 'class', 'order', 'family', 'ge ------- Pandas.DataFrame holding a high scoring table as in Figure 3c. """ + if ranks: + useranks = get_user_ranks_list(ranks) + else: + useranks = c.ALL_RANKS[:7] + pd_metrics = metrics.copy() pd_metrics.loc[pd_metrics[pd.isnull(pd_metrics['rank'])].index, 'rank'] = 'rank independent' @@ -48,7 +67,7 @@ def highscore_table(metrics, useranks=['phylum', 'class', 'order', 'family', 'ge posresults.append(res) posresults = pd.concat(posresults) - return posresults.groupby(['metric', 'tool'])['position'].sum().to_frame() + return posresults.groupby(['metric', 'tool'])['position'].sum().to_frame(), useranks # reformat like Figure 3c os = [] diff --git a/version.py b/version.py index cd7ca49..a6221b3 100644 --- a/version.py +++ b/version.py @@ -1 +1 @@ -__version__ = '1.0.1' +__version__ = '1.0.2'