LCA-ActivityBrowser · mrvisscher · Dec 17, 2024 · Sep 22, 2023 · Sep 22, 2023 · Jan 11, 2024
diff --git a/activity_browser/bwutils/multilca.py b/activity_browser/bwutils/multilca.py
@@ -1,4 +1,5 @@
 from collections import OrderedDict
+from copy import deepcopy
 from typing import Iterable, Optional, Union
 from logging import getLogger
 
@@ -415,6 +416,7 @@ def _build_dict(
         rev_dict: dict,
         limit: int,
         limit_type: str,
+        total_range: bool,
     ) -> dict:
         """Sort the given contribution array on method or reference flow column.
 
@@ -433,15 +435,32 @@ def _build_dict(
         """
         topcontribution_dict = dict()
         for fu_or_method, col in FU_M_index.items():
+            contribution_col = contributions[col, :]
+            if total_range:  # total is based on the range
+                total = np.abs(contribution_col).sum()
+            else:  # total is based on the score
+                total = contribution_col.sum()
+
             top_contribution = ca.sort_array(
-                contributions[col, :], limit=limit, limit_type=limit_type
+                contribution_col, limit=limit, limit_type=limit_type, total=total
+            )
+
+            # split and calculate remaining rest sections for positive and negative part
+            pos_rest = (
+                np.sum(contribution_col[contribution_col > 0])
+                - np.sum(top_contribution[top_contribution[:, 0] > 0][:, 0])
             )
+            neg_rest = (
+                    np.sum(contribution_col[contribution_col < 0])
+                    - np.sum(top_contribution[top_contribution[:, 0] < 0][:, 0])
+            )
+
             cont_per = OrderedDict()
             cont_per.update(
                 {
-                    ("Total", ""): contributions[col, :].sum(),
-                    ("Rest", ""): contributions[col, :].sum()
-                    - top_contribution[:, 0].sum(),
+                    ("Total", ""): total,
+                    ("Rest (+)", ""): pos_rest,
+                    ("Rest (-)", ""): neg_rest,
                 }
             )
             for value, index in top_contribution:
@@ -544,12 +563,12 @@ def join_df_with_metadata(
 
         if special_keys:
             # replace index keys with labels
-            try:  # first put Total and Rest to the first two positions in the dataframe
+            try:  # first put Total, Rest (+) and Rest (-) to the first three positions in the dataframe
                 complete_index = special_keys + keys
                 joined = joined.reindex(complete_index, axis="index", fill_value=0.0)
             except:
                 log.error(
-                    "Could not put Total and Rest on positions 0 and 1 in the dataframe."
+                    "Could not put 'Total', 'Rest (+)' and 'Rest (-)' on positions 0, 1 and 2 in the dataframe."
                 )
         joined.index = cls.get_labels(joined.index, fields=x_fields)
         return joined
@@ -583,18 +602,20 @@ def get_labelled_contribution_dict(
         # If the cont_dict has tuples for keys, coerce df.columns into MultiIndex
         if all(isinstance(k, tuple) for k in cont_dict.keys()):
             df.columns = pd.MultiIndex.from_tuples(df.columns)
-        special_keys = [("Total", ""), ("Rest", "")]
-
+        special_keys = [("Total", ""), ("Rest (+)", ""), ("Rest (-)", "")]
         # replace all 0 values with NaN and drop all rows with only NaNs
-        # EXCEPT for the special keys
-        df.index = ids_to_keys(df.index)
-        index = (
-            df.loc[df.index.difference(special_keys)]
-            .replace(0, np.nan)
-            .dropna(how="all")
-            .index.union(special_keys)
-        )
-        df = df.loc[index]
+        df = df.replace(0, np.nan)
+
+        # sort on absolute mean of a row
+        df_bot = deepcopy(df.loc[df.index.difference(special_keys)].dropna(how="all"))
+
+        func = lambda row: np.nanmean(np.abs(row))
+        if len(df_bot) > 1:  # but only sort if there is something to sort
+            df_bot["_sort_me_"] = (df_bot.select_dtypes(include=np.number)).apply(func, axis=1)
+            df_bot.sort_values(by="_sort_me_", ascending=False, inplace=True)
+            del df_bot["_sort_me_"]
+
+        df = pd.concat([df.iloc[:3, :], df_bot], axis=0)
 
         if not mask:
             joined = self.join_df_with_metadata(
@@ -617,7 +638,7 @@ def adjust_table_unit(df: pd.DataFrame, method: Optional[tuple]) -> pd.DataFrame
         """Given a dataframe, adjust the unit of the table to either match the given method, or not exist."""
         if "unit" not in df.columns:
             return df
-        keys = df.index[~df["index"].isin({"Total", "Rest"})]
+        keys = df.index[~df["index"].isin({"Total", "Rest (+)", "Rest (-)"})]
         unit = bd.Method(method).metadata.get("unit") if method else "unit"
         df.loc[keys, "unit"] = unit
         return df
@@ -791,6 +812,7 @@ def top_elementary_flow_contributions(
         limit: int = 5,
         normalize: bool = False,
         limit_type: str = "number",
+        total_range: bool = True,
         **kwargs,
     ) -> pd.DataFrame:
         """Return top EF contributions for either functional_unit or method.
@@ -807,6 +829,7 @@ def top_elementary_flow_contributions(
         limit : The number of top contributions to consider
         normalize : Determines whether or not to normalize the contribution values
         limit_type : The type of limit, either 'number' or 'percent'
+        total_range : Whether to consider the total for contributions the range (True) or the score (False)
 
         Returns
         -------
@@ -830,7 +853,7 @@ def top_elementary_flow_contributions(
             contributions = self.normalize(contributions)
 
         top_cont_dict = self._build_dict(
-            contributions, index, rev_index, limit, limit_type
+            contributions, index, rev_index, limit, limit_type, total_range
         )
         labelled_df = self.get_labelled_contribution_dict(
             top_cont_dict, x_fields=x_fields, y_fields=y_fields, mask=mask
@@ -846,6 +869,7 @@ def top_process_contributions(
         limit: int = 5,
         normalize: bool = False,
         limit_type: str = "number",
+        total_range: bool = True,
         **kwargs,
     ) -> pd.DataFrame:
         """Return top process contributions for functional_unit or method.
@@ -885,7 +909,7 @@ def top_process_contributions(
             contributions = self.normalize(contributions)
 
         top_cont_dict = self._build_dict(
-            contributions, index, rev_index, limit, limit_type
+            contributions, index, rev_index, limit, limit_type, total_range
         )
         labelled_df = self.get_labelled_contribution_dict(
             top_cont_dict, x_fields=x_fields, y_fields=y_fields, mask=mask

diff --git a/activity_browser/docs/wiki/LCA-Results.md b/activity_browser/docs/wiki/LCA-Results.md
@@ -14,8 +14,8 @@
 
 ## Contribution Analysis
 ### Differences between approaches
-Activity Browser has two contribution analysis approaches available to assess results, 
-`Elementary Flow (EF) Contributions` and `Process contributions`.
+Activity Browser has three contribution analysis approaches available to assess results, 
+`Elementary Flow (EF) Contributions`, `Process contributions` and `First Tier (FT) Contributions`.
 
 Before we discuss the different approaches, we introduce a small example for the production of _'steel'_:
 
@@ -45,37 +45,70 @@ For the system and functional unit above, this would be:
 
 The _contribution matrix_ show the dis-aggregated results for each individual biosphere flow for each activity.
 
-#### EF contributions
+#### Elementary Flow (EF) contributions
 If we take sum the _rows_ to one row, we get the EF contributions 
 (the contribution of all CO<sub>2</sub> and CH<sub>4</sub> impacts together).
+
 In the case above, the EF contributions are:
 - CO<sub>2</sub>: 1.5404... (96.3%)
 - CH<sub>4</sub>: 0.0596... (3.7%)
 
 #### Process contributions
 If we take the sum of the _columns_ to one column, we get the process contributions
 (the contribution of all coal, electricity and steel production impacts together).
+
 In the case above, the process contributions are:
 - coal production: 0.0596... (3.7%)
-- electricity production: 0.5404... (62.5%)
-- steel production: 1 (33.8%)
+- electricity production: 0.5404... (33.8%)
+- steel production: 1 (62.5%)
 
 To summarize, the difference between EF and process contributions is the direction the contribution matrix is summed.
 
+#### First Tier (FT) contributions
+The FT contributions take a very different approach, instead of calculating the impact of processes anywhere in the 
+system, FT contributions are the process of the functional unit and all its inputs.
+By calculating the impact of the inputs to the functional unit, the impacts are accumulated.
+In the example above this would mean that the impact of _'coal'_ is calculated from only the coal needed directly by 
+_'steel production'_, the impact from coal produced for _'electricity production'_ would be included in the 
+_'electricty'_. 
+Together with the _direct_ impact from _'steel production'_, this is the _first tier_.
+
+This approach becomes more useful when using large systems to accumulate impacts into relevant parts of your foreground 
+system.
+
+Activity Browser calculates these impacts by applying _partial LCAs_ (LCA on part of the functional unit) on the inputs,
+scaled to the functional unit.
+
+In the case above, the FT contributions are:
+- coal: 0.0298... (1.9%)
+- electricity: 0.5702... (35.6%)
+- steel production: 1 (62.5%)
+
+Note that we now use the names of the products _'coal'_ and _'electricity'_ as we now assess the impacts of these inputs, 
+not the processes.
+
+Note also how the impact of _'steel production'_ is unchanged, as this still shows the _direct_ impact, but that the 
+impact of _'electricity'_ is higher than _'electricity production'_ in the process contributions. 
+This is due to the fact that we include all impacts in the production of electricity, not just the _direct_ impacts.
+However, these results are compensated by a lower impact of _'coal'_ (compared to process contributions of 
+_'coal production'_).
+The total impact is still 1.6.
+
 ### Manipulating results
 In this section we generalize a little bit for the different contribution approaches,
-we call the _from_ part of the contributions (the EFs or activities above) _entities_.
+we call the _from_ part of the contributions (the EFs or activities or FT above) _entities_.
 
 There are several ways Activity Browser manipulates your results by default.
 - The results are **sorted** so that the row with the largest (absolute) average values are shown first.
-- A `cut-off` of 5% is applied, this only shows results that contribute at least 5% to the total result, 
-  all other entities are grouped into a `Rest` group.
+- A `cut-off` of 5% is applied, this only shows results that contribute at least 5% to the total range of results, 
+  all other entities are grouped into a `Rest (+)` or `Rest (-)` groups.
 - The contributions are _normalized_ to the impact of that reference flow, meaning they are show as a percentage, 
   counting up to 100% for every item you compare.
 
 These actions are taken to show you the most relevant results.
 
-You can manually manipulate the contribution results in the next menu, which we explain bit by bit below.
+You can manually manipulate the contribution results in the menu shown below, which we will explain bit by bit 
+in the next sections.
 ![contributions cutoff](./assets/contribution_manipulation.png)
 
 #### Cut-off
@@ -84,7 +117,8 @@ The `Relative` mode shows contributions _from_ entities of _x_% or higher.
 The `Top #` mode shows contributions from the _x_ entities that contribute the most (as absolute).
 You can adjust the `Cut-off level` to change how many results you see.
 
-All results that don't make the cut-off will be grouped into the `Rest` group.
+All results that don't make the cut-off will be grouped into the `Rest (+)` and `Rest (-)` groups.
+The Rest groups are only present when there are positive or negative numbers remaining for the respective rest groups. 
 
 #### Compare
 The `Compare` menu allows you to compare different dimensions of results.
@@ -106,8 +140,29 @@ By default, Activity Browser shows a plot and a table.
 You can disable one of them if you want to focus on one of them.
 
 #### Relative and Absolute
-Finally, you can choose between `Relative` and `Absolute` results.
-The `Relative` results will sum to 100%, the `Absolute` results will sum to the impact score.
+You can choose between `Relative` and `Absolute` results.
+The `Relative` results will sum to 100% (the total score), the `Absolute` results will sum to the impact score.
+
+#### Range and Score
+If the Cut-off type is `Relative`, you can choose between `Range` and `Score`.
+This determines what you use as the _total_ to which the relative contributions are counted. 
+For `Range`, this is the full _range_ of results, for example, if all your negative results together have a score of -2
+and all your positive results together have a score of 10, the _range_ is 12 (-2 * -1 + 10).
+For `Score`, this is the total score (sum) of the results, for example, if all your negative results together have a 
+score of -2 and all your positive results together have a score of 10, the _score_ is 8 (-2 + 10).
+The `Range` or `Score` setting are only used when 1) your Cut-off type is `Relative` 
+and 2) your results contain both positive and negative results.
+
+### Positive and negative numbers in contribution results
+It can happen in LCA that you get both positive and negative numbers in your contribution results.
+Some of these reasons could be negative characterization factors, flows with negative numbers or using substitution flows.
+
+When there are both positive and negative numbers in the result, Activity Browser will show a marker to indicate 
+where the total score is, and show positive and negative contributions to the impact separately.
+
+Below is a simple example (with unrealistic values) to demonstrate this:
+
+![CA example with positive and negative results](./assets/ca_positive_negative_example.png)
 
 ## Sankey
 The `Sankey` tab shows results from [graph traversal](https://docs.brightway.dev/projects/graphtools/en/latest/index.html).
@@ -125,6 +180,8 @@ The `calculation depth` will stop traversing the supply chain once that number o
 In the Sankey, the red arrows show the _cumulative_ impact of the _product_ flow 
 (_direct_ from that process and _indirect_ from all upstream processes involved in producing that product), 
 the boxes show the _direct_ (process contribution) impact of that process.
+Effectively, the sankey graph is the First Tier contribution analysis, repeated for every activity you see in the graph,
+making it _n-tier_ contributions.
 
 Using the example above in the [contribution analysis](#contribution-analysis) section, we show the sankey below.
 The [process contribution](#process-contributions) results are also shown in the boxes below.

diff --git a/activity_browser/docs/wiki/assets/ca_positive_negative_example.png b/activity_browser/docs/wiki/assets/ca_positive_negative_example.png
diff --git a/activity_browser/docs/wiki/assets/contribution_manipulation.png b/activity_browser/docs/wiki/assets/contribution_manipulation.png