diff --git a/activity_browser/bwutils/multilca.py b/activity_browser/bwutils/multilca.py index 759c3da9f..83adb66d1 100644 --- a/activity_browser/bwutils/multilca.py +++ b/activity_browser/bwutils/multilca.py @@ -1,4 +1,5 @@ from collections import OrderedDict +from copy import deepcopy from typing import Iterable, Optional, Union from logging import getLogger @@ -433,15 +434,30 @@ def _build_dict( """ topcontribution_dict = dict() for fu_or_method, col in FU_M_index.items(): + + contribution_col = contributions[col, :] + total = contribution_col.sum() + top_contribution = ca.sort_array( - contributions[col, :], limit=limit, limit_type=limit_type + contribution_col, limit=limit, limit_type=limit_type, total=total + ) + + # split and calculate remaining rest sections for positive and negative part + pos_rest = ( + np.sum(contribution_col[contribution_col > 0]) + - np.sum(top_contribution[top_contribution[:, 0] > 0][:, 0]) + ) + neg_rest = ( + np.sum(contribution_col[contribution_col < 0]) + - np.sum(top_contribution[top_contribution[:, 0] < 0][:, 0]) ) + cont_per = OrderedDict() cont_per.update( { - ("Total", ""): contributions[col, :].sum(), - ("Rest", ""): contributions[col, :].sum() - - top_contribution[:, 0].sum(), + ("Total", ""): total, + ("Rest (+)", ""): pos_rest, + ("Rest (-)", ""): neg_rest, } ) for value, index in top_contribution: @@ -544,12 +560,12 @@ def join_df_with_metadata( if special_keys: # replace index keys with labels - try: # first put Total and Rest to the first two positions in the dataframe + try: # first put Total, Rest (+) and Rest (-) to the first three positions in the dataframe complete_index = special_keys + keys joined = joined.reindex(complete_index, axis="index", fill_value=0.0) except: log.error( - "Could not put Total and Rest on positions 0 and 1 in the dataframe." + "Could not put 'Total', 'Rest (+)' and 'Rest (-)' on positions 0, 1 and 2 in the dataframe." ) joined.index = cls.get_labels(joined.index, fields=x_fields) return joined @@ -583,7 +599,7 @@ def get_labelled_contribution_dict( # If the cont_dict has tuples for keys, coerce df.columns into MultiIndex if all(isinstance(k, tuple) for k in cont_dict.keys()): df.columns = pd.MultiIndex.from_tuples(df.columns) - special_keys = [("Total", ""), ("Rest", "")] + special_keys = [("Total", ""), ("Rest (+)", ""), ("Rest (-)", "")] # replace all 0 values with NaN and drop all rows with only NaNs # EXCEPT for the special keys @@ -596,6 +612,17 @@ def get_labelled_contribution_dict( ) df = df.loc[index] + # sort on absolute mean of a row + df_bot = deepcopy(df.iloc[3:, :]) + + func = lambda row: np.nanmean(np.abs(row)) + + df_bot["_sort_me_"] = (df_bot.select_dtypes(include=np.number)).apply(func, axis=1) + df_bot.sort_values(by="_sort_me_", ascending=False, inplace=True) + del df_bot["_sort_me_"] + + df = pd.concat([df.iloc[:3, :], df_bot], axis=0) + if not mask: joined = self.join_df_with_metadata( df, x_fields=x_fields, y_fields=y_fields, special_keys=special_keys @@ -617,7 +644,7 @@ def adjust_table_unit(df: pd.DataFrame, method: Optional[tuple]) -> pd.DataFrame """Given a dataframe, adjust the unit of the table to either match the given method, or not exist.""" if "unit" not in df.columns: return df - keys = df.index[~df["index"].isin({"Total", "Rest"})] + keys = df.index[~df["index"].isin({"Total", "Rest (+)", "Rest (-)"})] unit = bd.Method(method).metadata.get("unit") if method else "unit" df.loc[keys, "unit"] = unit return df diff --git a/activity_browser/docs/wiki/LCA-Results.md b/activity_browser/docs/wiki/LCA-Results.md index ebbfce3d4..f713bdc64 100644 --- a/activity_browser/docs/wiki/LCA-Results.md +++ b/activity_browser/docs/wiki/LCA-Results.md @@ -69,7 +69,7 @@ we call the _from_ part of the contributions (the EFs or activities above) _enti There are several ways Activity Browser manipulates your results by default. - The results are **sorted** so that the row with the largest (absolute) average values are shown first. - A `cut-off` of 5% is applied, this only shows results that contribute at least 5% to the total result, - all other entities are grouped into a `Rest` group. + all other entities are grouped into a `Rest (+)` or `Rest (-)` groups. - The contributions are _normalized_ to the impact of that reference flow, meaning they are show as a percentage, counting up to 100% for every item you compare. @@ -84,7 +84,8 @@ The `Relative` mode shows contributions _from_ entities of _x_% or higher. The `Top #` mode shows contributions from the _x_ entities that contribute the most (as absolute). You can adjust the `Cut-off level` to change how many results you see. -All results that don't make the cut-off will be grouped into the `Rest` group. +All results that don't make the cut-off will be grouped into the `Rest (+)` and `Rest (-)` groups. +The Rest groups are only present when there are positive or negative numbers remaining for the respective rest groups. #### Compare The `Compare` menu allows you to compare different dimensions of results. @@ -107,7 +108,28 @@ You can disable one of them if you want to focus on one of them. #### Relative and Absolute Finally, you can choose between `Relative` and `Absolute` results. -The `Relative` results will sum to 100%, the `Absolute` results will sum to the impact score. +The `Relative` results will sum to 100% (the total score), the `Absolute` results will sum to the impact score. + +### Positive and negative numbers in contribution results +It can happen in LCA that you get both positive and negative numbers in your contribution results. +Some of these reasons could be negative characterization factors, flows with negative numbers or using substitution flows. + +When there are both positive and negative numbers in the result, Activity Browser will show a marker to indicate +where the total score is, and show positive and negative contributions to the impact separately. + +Below is a simple example (with unrealistic values) to demonstrate this: + +![CA example with positive and negative results](./assets/ca_positive_negative_example.png) + +Other softwares (e.g. [Brightway2-Analyzer](https://github.com/brightway-lca/brightway2-analyzer)) +may use a different 'total', meaning the contributions may look different. +For example, Brightway2-Analyzer uses the total of absolute values +(so the range of numbers from the lowest negative number to the highest positive number) as 100% of the score. + +> [!IMPORTANT] +> Due to Activity Browser using 100% as the 'total' when you sum all positive contributions, +> positive results will sum to over 100% when there are also negative numbers, which will sum together to 100%. +> Even single contributions may be over 100%. ## Sankey The `Sankey` tab shows results from [graph traversal](https://docs.brightway.dev/projects/graphtools/en/latest/index.html). diff --git a/activity_browser/docs/wiki/assets/ca_positive_negative_example.png b/activity_browser/docs/wiki/assets/ca_positive_negative_example.png new file mode 100644 index 000000000..6dea1b229 Binary files /dev/null and b/activity_browser/docs/wiki/assets/ca_positive_negative_example.png differ diff --git a/activity_browser/ui/figures.py b/activity_browser/ui/figures.py index 4d65b9bf0..8941cf9bd 100644 --- a/activity_browser/ui/figures.py +++ b/activity_browser/ui/figures.py @@ -182,7 +182,8 @@ def __init__(self): self.plot_name = "Contributions" def plot(self, df: pd.DataFrame, unit: str = None): - """Plot a horizontal bar chart of the process contributions.""" + """Plot a horizontal stacked bar chart of contributions, + add 'total' marker if both positive and negative results are present.""" dfp = df.copy() dfp.index = dfp["index"] dfp.drop( @@ -190,6 +191,8 @@ def plot(self, df: pd.DataFrame, unit: str = None): ) # get rid of all non-numeric columns (metadata) if "Total" in dfp.index: dfp.drop("Total", inplace=True) + # drop rows if all values are 0 + dfp = dfp.loc[~(dfp == 0).all(axis=1)] self.ax.clear() canvas_width_inches, canvas_height_inches = self.get_canvas_size_in_inches() @@ -204,9 +207,18 @@ def plot(self, df: pd.DataFrame, unit: str = None): dfp.index = dfp.index.str.strip("_ \n\t") dfp.columns = dfp.columns.str.strip("_ \n\t") + # set colormap to use + items = dfp.shape[0] # how many contribution items + # skip grey and black at start/end of cmap + cmap = plt.cm.nipy_spectral_r(np.linspace(0, 1, items + 2))[1:-1] + colors = {item: color for item, color in zip(dfp.index, cmap)} + # overwrite rest values to grey + colors["Rest (+)"] = [0.8, 0.8, 0.8, 1.] + colors["Rest (-)"] = [0.8, 0.8, 0.8, 1.] + dfp.T.plot.barh( stacked=True, - cmap=plt.cm.nipy_spectral_r, + color=colors, ax=self.ax, legend=False if dfp.shape[0] >= self.MAX_LEGEND else True, ) @@ -225,6 +237,16 @@ def plot(self, df: pd.DataFrame, unit: str = None): self.ax.grid(which="major", axis="x", color="grey", linestyle="dashed") self.ax.set_axisbelow(True) # puts gridlines behind bars + # total marker when both negative and positive results are present in a column + marker_size = max(min(150 / dfp.shape[1], 35), 10) # set marker size dyanmic between 10 - 35 + for i, col in enumerate(dfp): + total = np.sum(dfp[col]) + abs_total = np.sum(np.abs(dfp[col])) + if abs(total) != abs_total: + self.ax.plot(total, i, + markersize=marker_size, marker="d", fillstyle="left", + markerfacecolor="black", markerfacecoloralt="grey", markeredgecolor="white") + # TODO review: remove or enable # refresh canvas diff --git a/activity_browser/ui/tables/models/lca_results.py b/activity_browser/ui/tables/models/lca_results.py index 5b4eab035..93267c831 100644 --- a/activity_browser/ui/tables/models/lca_results.py +++ b/activity_browser/ui/tables/models/lca_results.py @@ -28,8 +28,6 @@ def sync(self, df): class ContributionModel(PandasModel): def sync(self, df): - self._dataframe = df.replace(np.nan, "", regex=True) - # drop the 'rest' row if empty - if self._dataframe.select_dtypes(include=np.number).iloc[1, :].sum() == 0: - self._dataframe.drop(labels=1, inplace=True) + # drop any rows where all numbers are 0 + self._dataframe = df.loc[~(df.select_dtypes(include=np.number) == 0).all(axis=1)] self.updated.emit()