From 21cb6e5f3ff6194a077eff054f11d175008263b5 Mon Sep 17 00:00:00 2001 From: marc-vdm Date: Sun, 29 Sep 2024 17:53:35 +0200 Subject: [PATCH] Several CA fixes Add: - CA data: use actual total instead of sum(abs) - CA data: make distinction between positive and negative rest values (#886) - CA data: sort based on average of row instead of first column (#887) - CA table: consistent dropping of empty (rest) rows (#1044) - CA figure: add marker to show total score when both positive and negative results present (#647) --- activity_browser/bwutils/multilca.py | 37 ++++++++++++++++--- activity_browser/ui/figures.py | 25 ++++++++++++- .../ui/tables/models/lca_results.py | 6 +-- 3 files changed, 57 insertions(+), 11 deletions(-) diff --git a/activity_browser/bwutils/multilca.py b/activity_browser/bwutils/multilca.py index 759c3da9f..5637f8141 100644 --- a/activity_browser/bwutils/multilca.py +++ b/activity_browser/bwutils/multilca.py @@ -1,4 +1,5 @@ from collections import OrderedDict +from copy import deepcopy from typing import Iterable, Optional, Union from logging import getLogger @@ -433,15 +434,30 @@ def _build_dict( """ topcontribution_dict = dict() for fu_or_method, col in FU_M_index.items(): + + contribution_col = contributions[col, :] + total = contribution_col.sum() + top_contribution = ca.sort_array( - contributions[col, :], limit=limit, limit_type=limit_type + contribution_col, limit=limit, limit_type=limit_type, total=total + ) + + # split and calculate remaining rest sections for positive and negative part + pos_rest = ( + np.sum(contribution_col[contribution_col > 0]) + - np.sum(top_contribution[top_contribution[:, 0] > 0][:, 0]) + ) + neg_rest = ( + np.sum(contribution_col[contribution_col < 0]) + - np.sum(top_contribution[top_contribution[:, 0] < 0][:, 0]) ) + cont_per = OrderedDict() cont_per.update( { - ("Total", ""): contributions[col, :].sum(), - ("Rest", ""): contributions[col, :].sum() - - top_contribution[:, 0].sum(), + ("Total", ""): total, + ("Rest (+)", ""): pos_rest, + ("Rest (-)", ""): neg_rest, } ) for value, index in top_contribution: @@ -583,7 +599,7 @@ def get_labelled_contribution_dict( # If the cont_dict has tuples for keys, coerce df.columns into MultiIndex if all(isinstance(k, tuple) for k in cont_dict.keys()): df.columns = pd.MultiIndex.from_tuples(df.columns) - special_keys = [("Total", ""), ("Rest", "")] + special_keys = [("Total", ""), ("Rest (+)", ""), ("Rest (-)", "")] # replace all 0 values with NaN and drop all rows with only NaNs # EXCEPT for the special keys @@ -596,6 +612,17 @@ def get_labelled_contribution_dict( ) df = df.loc[index] + # sort on absolute mean of a row + df_bot = deepcopy(df.iloc[3:, :]) + + func = lambda row: np.nanmean(np.abs(row)) + + df_bot["_sort_me_"] = (df_bot.select_dtypes(include=np.number)).apply(func, axis=1) + df_bot.sort_values(by="_sort_me_", ascending=False, inplace=True) + del df_bot["_sort_me_"] + + df = pd.concat([df.iloc[:3, :], df_bot], axis=0) + if not mask: joined = self.join_df_with_metadata( df, x_fields=x_fields, y_fields=y_fields, special_keys=special_keys diff --git a/activity_browser/ui/figures.py b/activity_browser/ui/figures.py index 4d65b9bf0..bf8309bfb 100644 --- a/activity_browser/ui/figures.py +++ b/activity_browser/ui/figures.py @@ -182,7 +182,8 @@ def __init__(self): self.plot_name = "Contributions" def plot(self, df: pd.DataFrame, unit: str = None): - """Plot a horizontal bar chart of the process contributions.""" + """Plot a horizontal stacked bar chart of contributions, + add 'total' marker if both positive and negative results are present.""" dfp = df.copy() dfp.index = dfp["index"] dfp.drop( @@ -190,6 +191,8 @@ def plot(self, df: pd.DataFrame, unit: str = None): ) # get rid of all non-numeric columns (metadata) if "Total" in dfp.index: dfp.drop("Total", inplace=True) + # drop rows if all values are 0 + dfp = dfp.loc[~(dfp == 0).all(axis=1)] self.ax.clear() canvas_width_inches, canvas_height_inches = self.get_canvas_size_in_inches() @@ -204,9 +207,18 @@ def plot(self, df: pd.DataFrame, unit: str = None): dfp.index = dfp.index.str.strip("_ \n\t") dfp.columns = dfp.columns.str.strip("_ \n\t") + # set colormap to use + items = dfp.shape[0] # how many contribution items + # skip grey and black at start/end of cmap + cmap = plt.cm.nipy_spectral_r(np.linspace(0, 1, items + 2))[1:-1] + colors = {item: color for item, color in zip(dfp.index, cmap)} + # overwrite rest values to grey + colors["Rest (+)"] = [0.8, 0.8, 0.8, 1.] + colors["Rest (-)"] = [0.8, 0.8, 0.8, 1.] + dfp.T.plot.barh( stacked=True, - cmap=plt.cm.nipy_spectral_r, + color=colors, ax=self.ax, legend=False if dfp.shape[0] >= self.MAX_LEGEND else True, ) @@ -225,6 +237,15 @@ def plot(self, df: pd.DataFrame, unit: str = None): self.ax.grid(which="major", axis="x", color="grey", linestyle="dashed") self.ax.set_axisbelow(True) # puts gridlines behind bars + # total marker when both negative and positive results are present + if "Rest (+)" in dfp.index and "Rest (-)" in dfp.index: + marker_size = max(min(150 / dfp.shape[1], 35), 10) # set marker size dyanmic between 10 - 35 + for i, col in enumerate(dfp): + total = np.sum(dfp[col]) + self.ax.plot(total, i, + markersize=marker_size, marker="d", fillstyle="left", + markerfacecolor="black", markerfacecoloralt="grey", markeredgecolor="white") + # TODO review: remove or enable # refresh canvas diff --git a/activity_browser/ui/tables/models/lca_results.py b/activity_browser/ui/tables/models/lca_results.py index 5b4eab035..93267c831 100644 --- a/activity_browser/ui/tables/models/lca_results.py +++ b/activity_browser/ui/tables/models/lca_results.py @@ -28,8 +28,6 @@ def sync(self, df): class ContributionModel(PandasModel): def sync(self, df): - self._dataframe = df.replace(np.nan, "", regex=True) - # drop the 'rest' row if empty - if self._dataframe.select_dtypes(include=np.number).iloc[1, :].sum() == 0: - self._dataframe.drop(labels=1, inplace=True) + # drop any rows where all numbers are 0 + self._dataframe = df.loc[~(df.select_dtypes(include=np.number) == 0).all(axis=1)] self.updated.emit()