From 21cb6e5f3ff6194a077eff054f11d175008263b5 Mon Sep 17 00:00:00 2001
From: marc-vdm <m.t.van.der.meide@cml.leidenuniv.nl>
Date: Sun, 29 Sep 2024 17:53:35 +0200
Subject: [PATCH] Several CA fixes

Add:
- CA data: use actual total instead of sum(abs)
- CA data: make distinction between positive and negative rest values (#886)
- CA data: sort based on average of row instead of first column (#887)
- CA table: consistent dropping of empty (rest) rows (#1044)
- CA figure: add marker to show total score when both positive and negative results present (#647)
---
 activity_browser/bwutils/multilca.py          | 37 ++++++++++++++++---
 activity_browser/ui/figures.py                | 25 ++++++++++++-
 .../ui/tables/models/lca_results.py           |  6 +--
 3 files changed, 57 insertions(+), 11 deletions(-)

diff --git a/activity_browser/bwutils/multilca.py b/activity_browser/bwutils/multilca.py
index 759c3da9f..5637f8141 100644
--- a/activity_browser/bwutils/multilca.py
+++ b/activity_browser/bwutils/multilca.py
@@ -1,4 +1,5 @@
 from collections import OrderedDict
+from copy import deepcopy
 from typing import Iterable, Optional, Union
 from logging import getLogger
 
@@ -433,15 +434,30 @@ def _build_dict(
         """
         topcontribution_dict = dict()
         for fu_or_method, col in FU_M_index.items():
+
+            contribution_col = contributions[col, :]
+            total = contribution_col.sum()
+
             top_contribution = ca.sort_array(
-                contributions[col, :], limit=limit, limit_type=limit_type
+                contribution_col, limit=limit, limit_type=limit_type, total=total
+            )
+
+            # split and calculate remaining rest sections for positive and negative part
+            pos_rest = (
+                np.sum(contribution_col[contribution_col > 0])
+                - np.sum(top_contribution[top_contribution[:, 0] > 0][:, 0])
+            )
+            neg_rest = (
+                    np.sum(contribution_col[contribution_col < 0])
+                    - np.sum(top_contribution[top_contribution[:, 0] < 0][:, 0])
             )
+
             cont_per = OrderedDict()
             cont_per.update(
                 {
-                    ("Total", ""): contributions[col, :].sum(),
-                    ("Rest", ""): contributions[col, :].sum()
-                    - top_contribution[:, 0].sum(),
+                    ("Total", ""): total,
+                    ("Rest (+)", ""): pos_rest,
+                    ("Rest (-)", ""): neg_rest,
                 }
             )
             for value, index in top_contribution:
@@ -583,7 +599,7 @@ def get_labelled_contribution_dict(
         # If the cont_dict has tuples for keys, coerce df.columns into MultiIndex
         if all(isinstance(k, tuple) for k in cont_dict.keys()):
             df.columns = pd.MultiIndex.from_tuples(df.columns)
-        special_keys = [("Total", ""), ("Rest", "")]
+        special_keys = [("Total", ""), ("Rest (+)", ""), ("Rest (-)", "")]
 
         # replace all 0 values with NaN and drop all rows with only NaNs
         # EXCEPT for the special keys
@@ -596,6 +612,17 @@ def get_labelled_contribution_dict(
         )
         df = df.loc[index]
 
+        # sort on absolute mean of a row
+        df_bot = deepcopy(df.iloc[3:, :])
+
+        func = lambda row: np.nanmean(np.abs(row))
+
+        df_bot["_sort_me_"] = (df_bot.select_dtypes(include=np.number)).apply(func, axis=1)
+        df_bot.sort_values(by="_sort_me_", ascending=False, inplace=True)
+        del df_bot["_sort_me_"]
+
+        df = pd.concat([df.iloc[:3, :], df_bot], axis=0)
+
         if not mask:
             joined = self.join_df_with_metadata(
                 df, x_fields=x_fields, y_fields=y_fields, special_keys=special_keys
diff --git a/activity_browser/ui/figures.py b/activity_browser/ui/figures.py
index 4d65b9bf0..bf8309bfb 100644
--- a/activity_browser/ui/figures.py
+++ b/activity_browser/ui/figures.py
@@ -182,7 +182,8 @@ def __init__(self):
         self.plot_name = "Contributions"
 
     def plot(self, df: pd.DataFrame, unit: str = None):
-        """Plot a horizontal bar chart of the process contributions."""
+        """Plot a horizontal stacked bar chart of contributions,
+        add 'total' marker if both positive and negative results are present."""
         dfp = df.copy()
         dfp.index = dfp["index"]
         dfp.drop(
@@ -190,6 +191,8 @@ def plot(self, df: pd.DataFrame, unit: str = None):
         )  # get rid of all non-numeric columns (metadata)
         if "Total" in dfp.index:
             dfp.drop("Total", inplace=True)
+        # drop rows if all values are 0
+        dfp = dfp.loc[~(dfp == 0).all(axis=1)]
 
         self.ax.clear()
         canvas_width_inches, canvas_height_inches = self.get_canvas_size_in_inches()
@@ -204,9 +207,18 @@ def plot(self, df: pd.DataFrame, unit: str = None):
         dfp.index = dfp.index.str.strip("_ \n\t")
         dfp.columns = dfp.columns.str.strip("_ \n\t")
 
+        # set colormap to use
+        items = dfp.shape[0]  # how many contribution items
+        # skip grey and black at start/end of cmap
+        cmap = plt.cm.nipy_spectral_r(np.linspace(0, 1, items + 2))[1:-1]
+        colors = {item: color for item, color in zip(dfp.index, cmap)}
+        # overwrite rest values to grey
+        colors["Rest (+)"] = [0.8, 0.8, 0.8, 1.]
+        colors["Rest (-)"] = [0.8, 0.8, 0.8, 1.]
+
         dfp.T.plot.barh(
             stacked=True,
-            cmap=plt.cm.nipy_spectral_r,
+            color=colors,
             ax=self.ax,
             legend=False if dfp.shape[0] >= self.MAX_LEGEND else True,
         )
@@ -225,6 +237,15 @@ def plot(self, df: pd.DataFrame, unit: str = None):
         self.ax.grid(which="major", axis="x", color="grey", linestyle="dashed")
         self.ax.set_axisbelow(True)  # puts gridlines behind bars
 
+        # total marker when both negative and positive results are present
+        if "Rest (+)" in dfp.index and "Rest (-)" in dfp.index:
+            marker_size = max(min(150 / dfp.shape[1], 35), 10)  # set marker size dyanmic between 10 - 35
+            for i, col in enumerate(dfp):
+                total = np.sum(dfp[col])
+                self.ax.plot(total, i,
+                             markersize=marker_size, marker="d", fillstyle="left",
+                             markerfacecolor="black", markerfacecoloralt="grey", markeredgecolor="white")
+
         # TODO review: remove or enable
 
         # refresh canvas
diff --git a/activity_browser/ui/tables/models/lca_results.py b/activity_browser/ui/tables/models/lca_results.py
index 5b4eab035..93267c831 100644
--- a/activity_browser/ui/tables/models/lca_results.py
+++ b/activity_browser/ui/tables/models/lca_results.py
@@ -28,8 +28,6 @@ def sync(self, df):
 
 class ContributionModel(PandasModel):
     def sync(self, df):
-        self._dataframe = df.replace(np.nan, "", regex=True)
-        # drop the 'rest' row if empty
-        if self._dataframe.select_dtypes(include=np.number).iloc[1, :].sum() == 0:
-            self._dataframe.drop(labels=1, inplace=True)
+        # drop any rows where all numbers are 0
+        self._dataframe = df.loc[~(df.select_dtypes(include=np.number) == 0).all(axis=1)]
         self.updated.emit()