output metrics with and without filter

CAMI-challenge · Feb 16, 2021 · e6addaa · e6addaa
1 parent 3ad737d
commit e6addaa
Show file tree

Hide file tree

Showing 4 changed files with 39 additions and 37 deletions.
diff --git a/opal.py b/opal.py
@@ -70,11 +70,11 @@ def print_by_rank(output_dir, labels, pd_metrics):
     order_rows = labels
     # define ordering of columns, hard coded
     order_columns = [c.UNIFRAC, c.UNW_UNIFRAC, c.L1NORM, c.RECALL, c.PRECISION, c.F1_SCORE, c.TP, c.FP, c.FN, c.OTUS, c.JACCARD, c.SHANNON_DIVERSITY, c.SHANNON_EQUIT, c.BRAY_CURTIS]
-    if c.FP_UNFILTERED in pd_metrics['metric'].values:
-        order_columns += [c.PRECISION_UNFILTERED, c.F1_SCORE_UNFILTERED, c.TP_UNFILTERED, c.FP_UNFILTERED]
+    if c.FP + c.UNFILTERED_SUF in pd_metrics['metric'].values:
+        order_columns += [metric + c.UNFILTERED_SUF for metric in order_columns]
     for rank in c.ALL_RANKS:
         # subset to those information that either belong to the given rank or are rank independent, i.e. are unifrac values
-        table = pd_metrics[(pd_metrics['rank'] == rank) | (pd_metrics['metric'].isin([c.UNIFRAC, c.UNW_UNIFRAC]))]
+        table = pd_metrics[(pd_metrics['rank'] == rank) | (pd_metrics['metric'].isin([c.UNIFRAC, c.UNW_UNIFRAC, c.UNIFRAC + c.UNFILTERED_SUF, c.UNW_UNIFRAC + c.UNFILTERED_SUF]))]
         # reformat the table with a pivot_table
         table = table.pivot_table(index=['tool', 'sample'], columns='metric', values='value')
         # select only tools in labels and get rid of gold standard
@@ -92,14 +92,16 @@ def print_by_tool(output_dir, pd_metrics):
     make_sure_path_exists(os.path.join(output_dir, "by_tool"))
     # define ordering of columns, hard coded
     order_columns = [c.UNIFRAC, c.UNW_UNIFRAC, c.L1NORM, c.RECALL, c.PRECISION, c.F1_SCORE, c.TP, c.FP, c.FN, c.OTUS, c.JACCARD, c.SHANNON_DIVERSITY, c.SHANNON_EQUIT, c.BRAY_CURTIS]
-    if c.FP_UNFILTERED in pd_metrics['metric'].values:
-        order_columns += [c.PRECISION_UNFILTERED, c.F1_SCORE_UNFILTERED, c.TP_UNFILTERED, c.FP_UNFILTERED]
+    unifrac_list = [c.UNIFRAC, c.UNW_UNIFRAC]
+    if c.FP + c.UNFILTERED_SUF in pd_metrics['metric'].values:
+        order_columns += [metric + c.UNFILTERED_SUF for metric in order_columns]
+        unifrac_list += [c.UNIFRAC + c.UNFILTERED_SUF, c.UNW_UNIFRAC + c.UNFILTERED_SUF]
     for toolname, pd_metrics_tool in pd_metrics.groupby('tool'):
         if toolname == c.GS:
             continue
         table = pd_metrics_tool.pivot_table(index=['rank', 'sample'], columns='metric', values='value')
         # little hack to carry unifrac over to every rank
-        for unifrac_col in order_columns[:2]:
+        for unifrac_col in unifrac_list:
             table[unifrac_col] = pd_metrics_tool[pd_metrics_tool['metric'] == unifrac_col]['value'].values[0]
         # order table
         table['rank_cat'] = pd.Categorical(table.index.get_level_values('rank'), categories=c.ALL_RANKS, ordered=True)
@@ -167,7 +169,7 @@ def evaluate(gs_samples_list, profiles_list_to_samples_list, labels, normalize,
     if filter_tail_percentage:
         metrics_list = pd_metrics['metric'].unique().tolist()
         pd_metrics_copy = pd_metrics.copy()
-        pd_metrics_copy['metric'].replace(metrics_list, [metric + ' (unfiltered)' for metric in metrics_list], inplace=True)
+        pd_metrics_copy['metric'].replace(metrics_list, [metric + c.UNFILTERED_SUF for metric in metrics_list], inplace=True)
         pd_metrics = pd.concat([pd_metrics, pd_metrics_copy], ignore_index=True)
 
     one_profile_assessed = False
@@ -250,13 +252,9 @@ def reformat_pandas(sample_id, label, braycurtis, shannon, binary_metrics, l1nor
 
     # convert Binary metrics
     pd_binary_metrics = pd.DataFrame([binary_metrics[rank].get_pretty_dict() for rank in binary_metrics.keys()]).set_index('rank').stack().reset_index().rename(columns={'level_1': 'metric', 0: 'value'})
-    if 'fpfiltered' in pd_binary_metrics['metric'].values:
-        oldnames = ['fp', 'fpfiltered', 'tp', 'tpfiltered', 'fn', 'jaccard', 'precision', 'precisionfiltered', 'recall', 'f1', 'f1filtered', 'otus']
-        newnames = [c.FP_UNFILTERED, c.FP, c.TP_UNFILTERED, c.TP, c.FN, c.JACCARD, c.PRECISION_UNFILTERED, c.PRECISION, c.RECALL, c.F1_SCORE_UNFILTERED, c.F1_SCORE, c.OTUS]
-    else:
-        oldnames = ['fp', 'tp', 'fn', 'jaccard', 'precision', 'recall', 'f1', 'otus']
-        newnames = [c.FP, c.TP, c.FN, c.JACCARD, c.PRECISION, c.RECALL, c.F1_SCORE, c.OTUS]
-    pd_binary_metrics['metric'].replace(oldnames, newnames, inplace=True)
+    pd_binary_metrics['metric'].replace(['fp', 'tp', 'fn', 'jaccard', 'precision', 'recall', 'f1', 'otus'],
+                                        [c.FP, c.TP, c.FN, c.JACCARD, c.PRECISION, c.RECALL, c.F1_SCORE, c.OTUS],
+                                        inplace=True)
     pd_binary_metrics['sample'] = sample_id
     pd_binary_metrics['tool'] = label
 
@@ -270,7 +268,7 @@ def reformat_pandas(sample_id, label, braycurtis, shannon, binary_metrics, l1nor
 
     if rename_as_unfiltered:
         metrics_list = pd_formatted['metric'].unique().tolist()
-        pd_formatted['metric'].replace(metrics_list, [metric + ' (unfiltered)' for metric in metrics_list], inplace=True)
+        pd_formatted['metric'].replace(metrics_list, [metric + c.UNFILTERED_SUF for metric in metrics_list], inplace=True)
 
     return pd_formatted
 

diff --git a/src/binary_metrics.py b/src/binary_metrics.py
@@ -170,7 +170,7 @@ def jaccard_index(tp, rank_query_taxids, rank_truth_taxids):
 
 def f1_score(this_precision, this_recall):
     """ Returns f1 score
-    >>> f1_score(rank_metrics.precision, rank_metrics.recall)
+    >>> f1_score(test_rank_metrics.precision, test_rank_metrics.recall)
     1.0
 
     """
@@ -182,7 +182,7 @@ def f1_score(this_precision, this_recall):
 
 def compute_rank_metrics(rank_query, rank_truth, rank):
     """ Returns metrics for one rank
-    >>> compute_rank_metrics(test_query_rank, test_truth_rank, "species", None).get_ordered_dict()
+    >>> compute_rank_metrics(test_query_rank, test_truth_rank, "species").get_ordered_dict()
     OrderedDict([('_RankMetrics__f1', 1.0), ('_RankMetrics__fn', 0), ('_RankMetrics__fp', 0), ('_RankMetrics__jaccard', 1.0), ('_RankMetrics__otus', 1), ('_RankMetrics__precision', 1.0), ('_RankMetrics__rank', 'species'), ('_RankMetrics__recall', 1.0), ('_RankMetrics__tp', 1)])
 
     """
@@ -207,7 +207,7 @@ def compute_rank_metrics(rank_query, rank_truth, rank):
 
 def compute_tree_metrics(query, truth):
     """ Return metrics for tree
-    >>> compute_tree_metrics(query_tree, truth_tree, None)["species"].get_ordered_dict()
+    >>> compute_tree_metrics(query_tree, truth_tree)["species"].get_ordered_dict()
     OrderedDict([('_RankMetrics__f1', 0.5), ('_RankMetrics__fn', 1), ('_RankMetrics__fp', 3), ('_RankMetrics__jaccard', 0.3333333333333333), ('_RankMetrics__otus', 5), ('_RankMetrics__precision', 0.4), ('_RankMetrics__rank', 'species'), ('_RankMetrics__recall', 0.6666666666666666), ('_RankMetrics__tp', 2)])
     """
 

diff --git a/src/html_opal.py b/src/html_opal.py
@@ -105,6 +105,10 @@ def get_rank_to_sample_pd(pd_metrics):
     for index, row in pd_grouped_copy.iterrows():
         pd_grouped.loc[index][c.UNIFRAC] = pd_grouped.loc[('rank independent', index[1], index[2])][c.UNIFRAC]
         pd_grouped.loc[index][c.UNW_UNIFRAC] = pd_grouped.loc[('rank independent', index[1], index[2])][c.UNW_UNIFRAC]
+    if c.UNIFRAC + c.UNFILTERED_SUF in pd_grouped.columns:
+        for index, row in pd_grouped_copy.iterrows():
+            pd_grouped.loc[index][c.UNIFRAC + c.UNFILTERED_SUF] = pd_grouped.loc[('rank independent', index[1], index[2])][c.UNIFRAC + c.UNFILTERED_SUF]
+            pd_grouped.loc[index][c.UNW_UNIFRAC + c.UNFILTERED_SUF] = pd_grouped.loc[('rank independent', index[1], index[2])][c.UNW_UNIFRAC + c.UNFILTERED_SUF]
 
     for (rank, sample), g in pd_grouped.groupby(['rank', 'sample']):
         rank_to_sample_pd[rank][sample] = g.reset_index().rename(columns={'tool': 'Tool'}).drop(['rank', 'sample'], axis=1).set_index('Tool').T
@@ -205,27 +209,27 @@ def get_colors_and_ranges(name, all_values, df_metrics):
     hue2 = 240
 
     metrics = [c.PRECISION, c.RECALL, c.F1_SCORE, c.JACCARD]
-    metrics = metrics + [metric + ' (unfiltered)' for metric in metrics]
+    metrics = metrics + [metric + c.UNFILTERED_SUF for metric in metrics]
     if name in metrics:
         return color1, color2, hue1, hue2, 0, 1
 
     metrics = [c.FP, c.UNIFRAC, c.UNW_UNIFRAC]
-    metrics = metrics + [metric + ' (unfiltered)' for metric in metrics]
+    metrics = metrics + [metric + c.UNFILTERED_SUF for metric in metrics]
     if name in metrics:
         return color2, color1, hue2, hue1, 0, max(all_values)
 
-    if name == c.TP or name == c.TP + ' (unfiltered)':
+    if name == c.TP or name == c.TP + c.UNFILTERED_SUF:
         return color1, color2, hue1, hue2, 0, max(all_values)
 
-    if name == c.FN or name == c.FN + ' (unfiltered)':
+    if name == c.FN or name == c.FN + c.UNFILTERED_SUF:
         fn_values = df_metrics.loc[name, ].values
         # convert "<mean> (<standard error>)" to float of <mean>
         if len(fn_values) > 0 and isinstance(fn_values[0], str):
             fn_values = [float(x.split(' ')[0]) for x in fn_values]
         return color2, color1, hue2, hue1, 0, max(fn_values)
-    if name == c.L1NORM or name == c.L1NORM + ' (unfiltered)':
+    if name == c.L1NORM or name == c.L1NORM + c.UNFILTERED_SUF:
         return color2, color1, hue2, hue1, 0, 2
-    if name == c.BRAY_CURTIS or name == c.BRAY_CURTIS + ' (unfiltered)':
+    if name == c.BRAY_CURTIS or name == c.BRAY_CURTIS + c.UNFILTERED_SUF:
         return color2, color1, hue2, hue1, 0, 1
     return color1, color2, hue1, hue2, max(all_values), min(all_values)
 
@@ -292,13 +296,21 @@ def create_metrics_table(pd_metrics, labels, sample_ids_list):
     all_sample_ids = sample_ids_list[:]
     all_sample_ids.insert(0, '(average over samples)')
 
-    if c.FP_UNFILTERED in pd_metrics['metric'].values:
-        presence_metrics = [c.RECALL, c.PRECISION, c.PRECISION_UNFILTERED, c.F1_SCORE, c.F1_SCORE_UNFILTERED, c.TP, c.TP_UNFILTERED, c.FP, c.FP_UNFILTERED, c.FN, c.JACCARD]
-    else:
-        presence_metrics = [c.RECALL, c.PRECISION, c.F1_SCORE, c.TP, c.FP, c.FN, c.JACCARD]
+    presence_metrics = [c.RECALL, c.PRECISION, c.F1_SCORE, c.TP, c.FP, c.FN, c.JACCARD]
     estimates_metrics = [c.UNIFRAC, c.UNW_UNIFRAC, c.L1NORM, c.BRAY_CURTIS]
     alpha_diversity_metics = [c.OTUS, c.SHANNON_DIVERSITY, c.SHANNON_EQUIT]
     rank_independent_metrics = [c.UNIFRAC, c.UNW_UNIFRAC]
+
+    if c.FP + c.UNFILTERED_SUF in pd_metrics['metric'].values:
+        presence_metrics = [[metric, metric + c.UNFILTERED_SUF] for metric in presence_metrics]
+        presence_metrics = [metric for elem in presence_metrics for metric in elem]
+        estimates_metrics = [[metric, metric + c.UNFILTERED_SUF] for metric in estimates_metrics]
+        estimates_metrics = [metric for elem in estimates_metrics for metric in elem]
+        alpha_diversity_metics = [[metric, metric + c.UNFILTERED_SUF] for metric in alpha_diversity_metics]
+        alpha_diversity_metics = [metric for elem in alpha_diversity_metics for metric in elem]
+        rank_independent_metrics = [[metric, metric + c.UNFILTERED_SUF] for metric in rank_independent_metrics]
+        rank_independent_metrics = [metric for elem in rank_independent_metrics for metric in elem]
+
     all_metrics = [presence_metrics, estimates_metrics, alpha_diversity_metics]
 
     presence_metrics_label = 'Presence/absence of taxa'
@@ -333,11 +345,6 @@ def get_html_dict(metrics):
                       (c.OTUS, c.TOOLTIP_OTUS),
                       (c.SHANNON_DIVERSITY, c.TOOLTIP_SHANNON_DIVERSITY),
                       (c.SHANNON_EQUIT, c.TOOLTIP_SHANNON_EQUIT)]
-    if c.FP_UNFILTERED in pd_metrics['metric'].values:
-        metrics_tuples += [(c.FP_UNFILTERED, c.TOOLTIP_FP),
-                           (c.TP_UNFILTERED, c.TOOLTIP_TP),
-                           (c.PRECISION_UNFILTERED, c.TOOLTIP_PRECISION),
-                           (c.F1_SCORE_UNFILTERED, c.TOOLTIP_F1_SCORE)]
 
     d = get_html_dict(metrics_tuples)
 

diff --git a/src/utils/constants.py b/src/utils/constants.py
@@ -14,20 +14,17 @@
 UNW_UNIFRAC = 'Unweighted UniFrac error'
 L1NORM = 'L1 norm error'
 PRECISION = 'Purity'
-PRECISION_UNFILTERED = 'Purity (unfiltered)'
 RECALL = 'Completeness'
 F1_SCORE = 'F1 score'
-F1_SCORE_UNFILTERED = 'F1 score (unfiltered)'
 TP = 'True positives'
-TP_UNFILTERED = 'True positives (unfiltered)'
 FP = 'False positives'
-FP_UNFILTERED = 'False positives (unfiltered)'
 FN = "False negatives"
 OTUS = "Taxon counts"
 JACCARD = "Jaccard index"
 SHANNON_DIVERSITY = 'Shannon diversity'
 SHANNON_EQUIT = 'Shannon equitability'
 BRAY_CURTIS = 'Bray-Curtis distance'
+UNFILTERED_SUF = ' (unfiltered)'
 ALL_METRICS = [UNIFRAC, UNW_UNIFRAC, L1NORM, PRECISION, RECALL, F1_SCORE, TP, FP, FN, JACCARD, SHANNON_DIVERSITY, SHANNON_EQUIT, BRAY_CURTIS]
 
 TOOLTIP_UNIFRAC = 'tree-based measure of similarity between the true and predicted abundances at all taxonomic ranks ranging from 0 (high similarity) to 16 (low similarity).'