Add feather and CSV SDF export options.

polca · Jul 27, 2023 · 7a361bc · 7a361bc
1 parent b1763b0
commit 7a361bc
Show file tree

Hide file tree

Showing 9 changed files with 65 additions and 36 deletions.
diff --git a/conda/meta.yaml b/conda/meta.yaml
@@ -34,6 +34,7 @@ requirements:
     - requests
     - bottleneck
     - constructive_geometries>=0.8.2
+    - pyarrow
 
 test:
   imports:

diff --git a/dev/profile.prof b/dev/profile.prof
diff --git a/dev/test.py b/dev/test.py
@@ -2,7 +2,7 @@
 
 from premise import *
 
-bw.projects.set_current("new")
+bw.projects.set_current("ei39")
 
 scenarios = [
     {"model": "remind", "pathway": "SSP1-Base", "year": 2005},
@@ -12,8 +12,8 @@
 
 ndb = NewDatabase(
     scenarios=scenarios,
-    source_db="ecoinvent 3.8 cutoff",
-    source_version="3.8",
+    source_db="ecoinvent 3.9.1 cutoff",
+    source_version="3.9",
     key=b"tUePmX_S5B8ieZkkM7WUU2CnO8SmShwmAeWK9x2rTFo=",
 )
 

diff --git a/premise/__init__.py b/premise/__init__.py
@@ -1,5 +1,5 @@
 __all__ = ("NewDatabase", "clear_cache", "get_regions_definition")
-__version__ = (1, 5, 8)
+__version__ = (1, 5, 9)
 
 from pathlib import Path
 

diff --git a/premise/ecoinvent_modification.py b/premise/ecoinvent_modification.py
@@ -1122,11 +1122,14 @@ def update_all(self) -> None:
         self.update_emissions()
 
     def write_superstructure_db_to_brightway(
-        self, name: str = f"super_db_{date.today()}", filepath: str = None
+        self, name: str = f"super_db_{date.today()}", filepath: str = None, format: str = "excel"
     ) -> None:
         """
         Register a super-structure database,
         according to https://github.com/dgdekoning/brightway-superstructure
+        :param name: name of the super-structure database
+        :param filepath: filepath of the "scenarios difference file"
+        :param format: format of the "scenarios difference file" export. Can be "excel", "csv" or "feather".
         :return: filepath of the "scenarios difference file"
         """
 
@@ -1153,6 +1156,7 @@ def write_superstructure_db_to_brightway(
             db_name=name,
             filepath=filepath,
             version=self.version,
+            format=format,
         )
         self.database = check_amount_format(self.database)
 

diff --git a/premise/electricity.py b/premise/electricity.py
@@ -1434,11 +1434,16 @@ def create_biomass_markets(self) -> None:
                 ],
             }
 
+            available_biomass_vars = [
+                v for v in list(biomass_map.keys())
+                if v in self.iam_data.production_volumes.variables.values
+            ]
+
             for biomass_type, biomass_act in biomass_map.items():
                 total_prod_vol = np.clip(
                     (
                         self.iam_data.production_volumes.sel(
-                            variables=list(biomass_map.keys()), region=region
+                            variables=available_biomass_vars, region=region
                         )
                         .interp(year=self.year)
                         .sum(dim="variables")
@@ -1447,24 +1452,22 @@ def create_biomass_markets(self) -> None:
                     None,
                 )
 
-                share = np.clip(
-                    (
-                        self.iam_data.production_volumes.sel(
-                            variables=biomass_type, region=region
-                        )
-                        .interp(year=self.year)
-                        .sum()
-                        / total_prod_vol
-                    ).values.item(0),
-                    0,
-                    1,
-                )
+                if biomass_type in available_biomass_vars:
 
-                if not share:
-                    if biomass_type == "biomass - residual":
-                        share = 1
-                    else:
-                        share = 0
+                    share = np.clip(
+                        (
+                            self.iam_data.production_volumes.sel(
+                                variables=biomass_type, region=region
+                            )
+                            .interp(year=self.year)
+                            .sum()
+                            / total_prod_vol
+                        ).values.item(0),
+                        0,
+                        1,
+                    )
+                else:
+                    share = 0
 
                 if share > 0:
                     ecoinvent_regions = self.geo.iam_to_ecoinvent_location(

diff --git a/premise/export.py b/premise/export.py
@@ -819,19 +819,29 @@ def generate_scenario_difference_file(
     df.loc[df["flow type"] == "technosphere", "from categories"] = None
     df.loc[df["flow type"] == "production", "from categories"] = None
 
+    # all exchanges of type production must have the value 1.0 in each scenario
+    df.loc[df["flow type"] == "production", list_scenarios] = 1.0
+
     # return the dataframe and the new db
     return df, new_db, list_acts
 
 
 def generate_superstructure_db(
-    origin_db, scenarios, db_name, filepath, version
+    origin_db,
+    scenarios,
+    db_name,
+    filepath,
+    version,
+    format="excel"
 ) -> List[dict]:
     """
     Build a superstructure database from a list of databases
     :param origin_db: the original database
     :param scenarios: a list of modified databases
     :param db_name: the name of the new database
     :param filepath: the filepath of the new database
+    :param version: the version of the new database
+    :param format: the format of the scenario difference file. Cna be "excel", "csv" or "feather".
     :return: a superstructure database
     """
 
@@ -868,17 +878,26 @@ def generate_superstructure_db(
     after = len(df)
     print(f"Dropped {before - after} duplicate(s).")
 
-    filepath_sdf = filepath / f"scenario_diff_{db_name}.xlsx"
-    try:
-        df.to_excel(filepath_sdf, index=False)
-    except ValueError:
-        # from https://stackoverflow.com/questions/66356152/splitting-a-dataframe-into-multiple-sheets
-        GROUP_LENGTH = 1000000  # set nr of rows to slice df
-        with pd.ExcelWriter(filepath_sdf) as writer:
-            for i in range(0, len(df), GROUP_LENGTH):
-                df[i : i + GROUP_LENGTH].to_excel(
-                    writer, sheet_name=f"Row {i}", index=False, header=True
-                )
+    if format == "excel":
+        filepath_sdf = filepath / f"scenario_diff_{db_name}.xlsx"
+        try:
+            df.to_excel(filepath_sdf, index=False)
+        except ValueError:
+            # from https://stackoverflow.com/questions/66356152/splitting-a-dataframe-into-multiple-sheets
+            GROUP_LENGTH = 1000000  # set nr of rows to slice df
+            with pd.ExcelWriter(filepath_sdf) as writer:
+                for i in range(0, len(df), GROUP_LENGTH):
+                    df[i : i + GROUP_LENGTH].to_excel(
+                        writer, sheet_name=f"Row {i}", index=False, header=True
+                    )
+    elif format == "csv":
+        filepath_sdf = filepath / f"scenario_diff_{db_name}.csv"
+        df.to_csv(filepath_sdf, index=False, sep=";")
+    elif format == "feather":
+        filepath_sdf = filepath / f"scenario_diff_{db_name}.feather"
+        df.to_feather(filepath_sdf)
+    else:
+        raise ValueError(f"Unknown format {format}")
 
     print(f"Scenario difference file exported to {filepath}!")
 

diff --git a/requirements.txt b/requirements.txt
@@ -15,3 +15,4 @@ datapackage
 requests
 bottleneck
 constructive_geometries>=0.8.2
+pyarrow
diff --git a/setup.py b/setup.py
@@ -32,7 +32,7 @@ def package_files(directory):
 
 setup(
     name="premise",
-    version="1.5.8",
+    version="1.5.9",
     python_requires=">=3.9,<3.11",
     packages=packages,
     author="Romain Sacchi <[email protected]>, Alois Dirnaichner <[email protected]>, Chris Mutel "
@@ -58,6 +58,7 @@ def package_files(directory):
         "requests",
         "bottleneck",
         "constructive_geometries>=0.8.2",
+        "pyarrow"
     ],
     url="https://github.com/polca/premise",
     description="Coupling IAM output to ecoinvent LCA database ecoinvent for prospective LCA",