diff --git a/dev/Untitled1.ipynb b/dev/Untitled1.ipynb index 85f5cea5..581fd44e 100644 --- a/dev/Untitled1.ipynb +++ b/dev/Untitled1.ipynb @@ -4175,7 +4175,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.8" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/premise/clean_datasets.py b/premise/clean_datasets.py index 2c5f8bdc..d6b1dc46 100644 --- a/premise/clean_datasets.py +++ b/premise/clean_datasets.py @@ -138,6 +138,42 @@ def remove_categories(database: List[dict]) -> List[dict]: return database +def strip_string_from_spaces(database: List[dict]) -> List[dict]: + """ + Strip strings from spaces in the dataset of the wurst inventory database. + Modifies in place (does not return anything). + + :param database: wurst inventory database + :type database: list + + """ + for dataset in database: + dataset["name"] = dataset["name"].strip() + # also check for unicode characters like \xa0 + dataset["name"] = dataset["name"].replace("\xa0", "") + + dataset["reference product"] = dataset["reference product"].strip() + dataset["location"] = dataset["location"].strip() + for exc in dataset["exchanges"]: + exc["name"] = exc["name"].strip() + # also check for unicode characters like \xa0 + exc["name"] = exc["name"].replace("\xa0", "") + if exc.get("product"): + exc["product"] = exc["product"].strip() + # also check for unicode characters like \xa0 + exc["product"] = exc["product"].replace("\xa0", "") + if exc.get("reference product"): + exc["reference product"] = exc["reference product"].strip() + # also check for unicode characters like \xa0 + exc["reference product"] = exc["reference product"].replace("\xa0", "") + if exc.get("location"): + exc["location"] = exc["location"].strip() + if exc.get("unit"): + exc["unit"] = exc["unit"].strip() + + return database + + class DatabaseCleaner: """ Class that cleans the datasets contained in the inventory database for further processing. @@ -163,6 +199,8 @@ def __init__( ) self.database = wurst.extract_brightway2_databases(source_db) self.database = remove_categories(self.database) + # strip strings form spaces + self.database = strip_string_from_spaces(self.database) if source_type == "ecospold": # The ecospold data needs to be formatted @@ -171,6 +209,9 @@ def __init__( ) ecoinvent.apply_strategies() self.database = ecoinvent.data + # strip strings form spaces + self.database = strip_string_from_spaces(self.database) + # Location field is added to exchanges self.add_location_field_to_exchanges() # Product field is added to exchanges diff --git a/premise/data/additional_inventories/lci-battery-capacity.xlsx b/premise/data/additional_inventories/lci-battery-capacity.xlsx index a71e025b..52b7d047 100644 Binary files a/premise/data/additional_inventories/lci-battery-capacity.xlsx and b/premise/data/additional_inventories/lci-battery-capacity.xlsx differ diff --git a/premise/data/additional_inventories/lci-biofuels.xlsx b/premise/data/additional_inventories/lci-biofuels.xlsx index b0512c29..5b517256 100644 Binary files a/premise/data/additional_inventories/lci-biofuels.xlsx and b/premise/data/additional_inventories/lci-biofuels.xlsx differ diff --git a/premise/data/additional_inventories/lci-buses.xlsx b/premise/data/additional_inventories/lci-buses.xlsx index 1784e9dc..89935dbf 100644 Binary files a/premise/data/additional_inventories/lci-buses.xlsx and b/premise/data/additional_inventories/lci-buses.xlsx differ diff --git a/premise/data/additional_inventories/lci-carbon-capture.xlsx b/premise/data/additional_inventories/lci-carbon-capture.xlsx index 764cc376..478169f0 100644 Binary files a/premise/data/additional_inventories/lci-carbon-capture.xlsx and b/premise/data/additional_inventories/lci-carbon-capture.xlsx differ diff --git a/premise/data/additional_inventories/lci-hydrogen-distribution.xlsx b/premise/data/additional_inventories/lci-hydrogen-distribution.xlsx index c65df382..21ea602b 100644 Binary files a/premise/data/additional_inventories/lci-hydrogen-distribution.xlsx and b/premise/data/additional_inventories/lci-hydrogen-distribution.xlsx differ diff --git a/premise/data/additional_inventories/lci-hydrogen-electrolysis.xlsx b/premise/data/additional_inventories/lci-hydrogen-electrolysis.xlsx index 45ea460a..5f3e5471 100644 Binary files a/premise/data/additional_inventories/lci-hydrogen-electrolysis.xlsx and b/premise/data/additional_inventories/lci-hydrogen-electrolysis.xlsx differ diff --git a/premise/data/additional_inventories/lci-pass_cars.xlsx b/premise/data/additional_inventories/lci-pass_cars.xlsx index 065890c9..fbe885ed 100644 Binary files a/premise/data/additional_inventories/lci-pass_cars.xlsx and b/premise/data/additional_inventories/lci-pass_cars.xlsx differ diff --git a/premise/data/additional_inventories/lci-trucks.xlsx b/premise/data/additional_inventories/lci-trucks.xlsx index 57e2cafe..3a6a3a56 100644 Binary files a/premise/data/additional_inventories/lci-trucks.xlsx and b/premise/data/additional_inventories/lci-trucks.xlsx differ diff --git a/premise/data/additional_inventories/lci-two_wheelers.xlsx b/premise/data/additional_inventories/lci-two_wheelers.xlsx index 2e4ae50c..52991c4e 100644 Binary files a/premise/data/additional_inventories/lci-two_wheelers.xlsx and b/premise/data/additional_inventories/lci-two_wheelers.xlsx differ diff --git a/premise/electricity.py b/premise/electricity.py index 703b8bb2..134be54c 100644 --- a/premise/electricity.py +++ b/premise/electricity.py @@ -1550,7 +1550,7 @@ def create_region_specific_power_plants(self): ws.either( *[ws.contains("name", name) for name in list_datasets_to_duplicate] ), - # ws.exclude(ws.contains("name", "market")), + ws.exclude(ws.contains("name", "market")), ws.exclude(ws.contains("name", ", oxy, ")), ws.exclude(ws.contains("name", ", pre, ")), ): diff --git a/premise/export.py b/premise/export.py index 61a5f9f4..0869fb8b 100644 --- a/premise/export.py +++ b/premise/export.py @@ -960,9 +960,7 @@ def check_geographical_linking(scenario, original_database): return scenario -def prepare_db_for_export( - scenario, name, original_database, keep_uncertainty_data=False, biosphere_name=None -): +def prepare_db_for_export(scenario, name, original_database, biosphere_name=None): """ Prepare a database for export. """ @@ -979,7 +977,6 @@ def prepare_db_for_export( original_database=original_database, database=scenario["database"], db_name=name, - keep_uncertainty_data=keep_uncertainty_data, biosphere_name=biosphere_name, ) validator.run_all_checks() @@ -991,7 +988,6 @@ def _prepare_database( scenario, db_name, original_database, - keep_uncertainty_data, biosphere_name, ): @@ -999,7 +995,6 @@ def _prepare_database( scenario, name=db_name, original_database=original_database, - keep_uncertainty_data=keep_uncertainty_data, biosphere_name=biosphere_name, ) @@ -1169,6 +1164,10 @@ def create_B_matrix_coordinates(self): "Cannot find the biosphere flow", exc["name"], exc["categories"], + "in ", + ds["name"], + ds["reference product"], + ds["location"], ) row = () list_rows.append(row) diff --git a/premise/iam_variables_mapping/electricity_variables.yaml b/premise/iam_variables_mapping/electricity_variables.yaml index 32c0b15a..71189267 100644 --- a/premise/iam_variables_mapping/electricity_variables.yaml +++ b/premise/iam_variables_mapping/electricity_variables.yaml @@ -902,7 +902,7 @@ Solar PV Residential: Storage, Battery: ecoinvent_aliases: fltr: - - market for battery capacity, stationary (CONT scenario) + - electricity supply, from stationary battery (CONT scenario) iam_aliases: image: Secondary Energy|Electricity|Storage message: Secondary Energy|Electricity|Storage diff --git a/premise/inventory_imports.py b/premise/inventory_imports.py index 6bc2bb73..94ea7a07 100644 --- a/premise/inventory_imports.py +++ b/premise/inventory_imports.py @@ -262,22 +262,18 @@ def check_uncertainty_data(data, filename): exc["uncertainty type"] = 0 if exc["uncertainty type"] not in {0, 1}: - if not all( - f in exc + missing_parameters = [ + f for f in MANDATORY_UNCERTAINTY_FIELDS[exc["uncertainty type"]] - ): + if exc.get(f) is None + ] + if missing_parameters: rows.append( [ dataset["name"][:30], exc["name"][:30], exc["uncertainty type"], - [ - f - for f in MANDATORY_UNCERTAINTY_FIELDS[ - exc["uncertainty type"] - ] - if f not in exc - ], + missing_parameters, ] ) @@ -294,7 +290,11 @@ def check_uncertainty_data(data, filename): ] ) - if not exc["minimum"] <= exc["loc"] <= exc["maximum"]: + if ( + not exc.get("minimum", 0) + <= exc.get("loc", 0) + <= exc.get("maximum", 0) + ): rows.append( [ dataset["name"][:30], @@ -876,7 +876,7 @@ def prepare_inventory(self) -> None: # Remove uncertainty data if not self.keep_uncertainty_data: print("Remove uncertainty data.") - self.database = remove_uncertainty(self.database) + self.import_db.data = remove_uncertainty(self.import_db.data) else: check_uncertainty_data(self.import_db.data, filename=Path(self.path).stem) diff --git a/premise/new_database.py b/premise/new_database.py index 77882e70..7558458f 100644 --- a/premise/new_database.py +++ b/premise/new_database.py @@ -512,7 +512,8 @@ def __init__( use_cached_database: bool = True, external_scenarios: list = None, quiet=False, - keep_uncertainty_data=False, + keep_imports_uncertainty=False, + keep_source_db_uncertainty=False, gains_scenario="CLE", use_absolute_efficiency=False, biosphere_name: str = "biosphere3", @@ -523,7 +524,8 @@ def __init__( self.system_model = check_system_model(system_model) self.system_model_args = system_args self.use_absolute_efficiency = use_absolute_efficiency - self.keep_uncertainty_data = keep_uncertainty_data + self.keep_imports_uncertainty = keep_imports_uncertainty + self.keep_source_db_uncertainty = keep_source_db_uncertainty self.biosphere_name = check_presence_biosphere_database(biosphere_name) # if version is anything other than 3.8 or 3.9 @@ -628,7 +630,9 @@ def __find_cached_db(self, db_name: str) -> List[dict]: db_name = f"ecospold_{self.system_model}_{self.version}" uncertainty_data = ( - "w_uncertainty" if self.keep_uncertainty_data is True else "wo_uncertainty" + "w_uncertainty" + if self.keep_source_db_uncertainty is True + else "wo_uncertainty" ) file_name = ( @@ -661,7 +665,9 @@ def __find_cached_inventories(self, db_name: str) -> Union[None, List[dict]]: db_name = f"ecospold_{self.system_model}_{self.version}" uncertainty_data = ( - "w_uncertainty" if self.keep_uncertainty_data is True else "wo_uncertainty" + "w_uncertainty" + if self.keep_imports_uncertainty is True + else "wo_uncertainty" ) file_name = ( @@ -694,7 +700,7 @@ def __clean_database(self) -> List[dict]: """ return DatabaseCleaner( self.source, self.source_type, self.source_file_path, self.version - ).prepare_datasets(self.keep_uncertainty_data) + ).prepare_datasets(self.keep_source_db_uncertainty) def __import_inventories(self) -> List[dict]: """ @@ -806,7 +812,7 @@ def __import_inventories(self) -> List[dict]: version_out=self.version, path=filepath[0], system_model=self.system_model, - keep_uncertainty_data=self.keep_uncertainty_data, + keep_uncertainty_data=self.keep_imports_uncertainty, ) datasets = inventory.merge_inventory() data.extend(datasets) @@ -999,7 +1005,6 @@ def write_superstructure_db_to_brightway( scenario=scenario, db_name=name, original_database=self.database, - keep_uncertainty_data=self.keep_uncertainty_data, biosphere_name=self.biosphere_name, ) @@ -1023,7 +1028,6 @@ def write_superstructure_db_to_brightway( scenario=tmp_scenario, name="database", original_database=self.database, - keep_uncertainty_data=self.keep_uncertainty_data, biosphere_name=self.biosphere_name, ) @@ -1091,7 +1095,6 @@ def write_db_to_brightway(self, name: [str, List[str]] = None): scenario=scenario, db_name=name[s], original_database=self.database, - keep_uncertainty_data=self.keep_uncertainty_data, biosphere_name=self.biosphere_name, ) write_brightway_database( @@ -1166,7 +1169,6 @@ def scenario_name(scenario): scenario=scenario, db_name="database", original_database=self.database, - keep_uncertainty_data=self.keep_uncertainty_data, biosphere_name=self.biosphere_name, ) Export(scenario, filepath[s], self.version).export_db_to_matrices() @@ -1207,7 +1209,6 @@ def write_db_to_simapro(self, filepath: str = None): scenario=scenario, db_name="database", original_database=self.database, - keep_uncertainty_data=self.keep_uncertainty_data, biosphere_name=self.biosphere_name, ) export = Export(scenario, filepath, self.version) @@ -1254,7 +1255,6 @@ def write_db_to_olca(self, filepath: str = None): scenario=scenario, db_name="database", original_database=self.database, - keep_uncertainty_data=self.keep_uncertainty_data, biosphere_name=self.biosphere_name, ) Export(scenario, filepath, self.version).export_db_to_simapro( @@ -1295,7 +1295,6 @@ def write_datapackage( scenario=scenario, db_name=name, original_database=self.database, - keep_uncertainty_data=self.keep_uncertainty_data, biosphere_name=self.biosphere_name, ) diff --git a/premise/transformation.py b/premise/transformation.py index 716e42ce..ed40dfc3 100644 --- a/premise/transformation.py +++ b/premise/transformation.py @@ -1057,6 +1057,37 @@ def relink_datasets(self, excludes_datasets=None, alt_names=None): if len(excs_to_relink) == 0: continue + old_uncertainty = {} + + for exc in excs_to_relink: + if exc["type"] == "technosphere": + if exc.get("uncertainty type", 0) != 0: + old_uncertainty[ + (exc["name"], exc.get("product"), exc["unit"]) + ] = { + "uncertainty type": exc.get("uncertainty type", 0), + "loc": ( + exc.get("loc", 0) / exc["amount"] + if exc.get("loc") + else None + ), + "scale": ( + exc.get("scale", 0) / exc["amount"] + if exc.get("scale") + else None + ), + "minimum": ( + exc.get("minimum", 0) / exc["amount"] + if exc.get("minimum") + else None + ), + "maximum": ( + exc.get("maximum", 0) / exc["amount"] + if exc.get("maximum") + else None + ), + } + # make a dictionary with the names and amounts # of the technosphere exchanges to relink # to compare with the new exchanges @@ -1085,6 +1116,19 @@ def relink_datasets(self, excludes_datasets=None, alt_names=None): act, unique_excs_to_relink, alt_names ) + # apply uncertainties, if any + if old_uncertainty: + for exc in new_exchanges: + key = (exc["name"], exc["product"], exc["unit"]) + if key in old_uncertainty: + exc["uncertainty type"] = old_uncertainty[key][ + "uncertainty type" + ] + for k, v in old_uncertainty[key].items(): + if k != "uncertainty type": + if v is not None: + exc[k] = v * exc["amount"] + # Update act["exchanges"] by removing the exchanges to relink act["exchanges"] = [e for e in act["exchanges"] if e not in excs_to_relink] # Update act["exchanges"] by adding new exchanges @@ -1574,6 +1618,12 @@ def process_cached_exchange( "location": i[2], "type": "technosphere", "amount": exchange["amount"] * i[-1], + "uncertainty type": exchange.get("uncertainty type", 0), + "loc": exchange.get("loc", 0) * i[-1], + "scale": exchange.get("scale", 0) * i[-1], + "negative": exchange.get("negative", False), + "minimum": exchange.get("minimum", 0) * i[-1], + "maximum": exchange.get("maximum", 0) * i[-1], } for i in exchanges ] @@ -1634,6 +1684,12 @@ def process_uncached_exchange( "location": dataset["location"], "type": "technosphere", "amount": exchange["amount"], + "uncertainty type": exchange.get("uncertainty type", 0), + "loc": exchange.get("loc", None), + "scale": exchange.get("scale", None), + "negative": exchange.get("negative", False), + "minimum": exchange.get("minimum", None), + "maximum": exchange.get("maximum", None), } ] @@ -1679,6 +1735,12 @@ def new_exchange(self, exchange, location, amount_multiplier): "location": location, "type": "technosphere", "amount": exchange["amount"] * amount_multiplier, + "uncertainty type": exchange.get("uncertainty type", 0), + "loc": exchange.get("loc", None), + "scale": exchange.get("scale", None), + "negative": exchange.get("negative", False), + "minimum": exchange.get("minimum", None), + "maximum": exchange.get("maximum", None), } def handle_multiple_possible_datasets( @@ -1923,6 +1985,35 @@ def relink_technosphere_exchanges( if exc["type"] == "technosphere": exchanges_before[exc["product"]] += exc["amount"] + old_uncertainty = {} + + for exc in dataset["exchanges"]: + if exc["type"] == "technosphere": + if exc.get("uncertainty type", 0) != 0: + old_uncertainty[(exc["name"], exc.get("product"), exc["unit"])] = { + "uncertainty type": exc.get("uncertainty type", 0), + "loc": ( + exc.get("loc", 0) / exc["amount"] + if exc.get("loc") + else None + ), + "scale": ( + exc.get("scale", 0) / exc["amount"] + if exc.get("scale") + else None + ), + "minimum": ( + exc.get("minimum", 0) / exc["amount"] + if exc.get("minimum") + else None + ), + "maximum": ( + exc.get("maximum", 0) / exc["amount"] + if exc.get("maximum") + else None + ), + } + new_exchanges = self.find_candidates( dataset, exclusive=exclusive, @@ -1943,14 +2034,41 @@ def relink_technosphere_exchanges( "type": "technosphere", "amount": sum(exc["amount"] for exc in exchanges), } - for (name, prod, location, unit), exchanges in groupby( + for ( + name, + prod, + location, + unit, + ), exchanges in groupby( sorted( - new_exchanges, key=itemgetter("name", "product", "location", "unit") + new_exchanges, + key=itemgetter( + "name", + "product", + "location", + "unit", + ), + ), + key=itemgetter( + "name", + "product", + "location", + "unit", ), - key=itemgetter("name", "product", "location", "unit"), ) ] + # apply uncertainties, if any + if old_uncertainty: + for exc in new_exchanges: + key = (exc["name"], exc["product"], exc["unit"]) + if key in old_uncertainty: + exc["uncertainty type"] = old_uncertainty[key]["uncertainty type"] + for k, v in old_uncertainty[key].items(): + if k != "uncertainty type": + if v is not None: + exc[k] = v * exc["amount"] + dataset["exchanges"] = [ exc for exc in dataset["exchanges"] if exc["type"] != "technosphere" ] + new_exchanges diff --git a/premise/utils.py b/premise/utils.py index bd3954d9..62c5de95 100644 --- a/premise/utils.py +++ b/premise/utils.py @@ -308,7 +308,9 @@ def hide_messages(): """ print("Keep uncertainty data?") - print("NewDatabase(..., keep_uncertainty_data=True)") + print( + "NewDatabase(..., keep_source_db_uncertainty=True), keep_imports_uncertainty=True)" + ) print("") print("Hide these messages?") print("NewDatabase(..., quiet=True)") diff --git a/premise/validation.py b/premise/validation.py index 6841eeb5..18fea43c 100644 --- a/premise/validation.py +++ b/premise/validation.py @@ -151,7 +151,6 @@ def __init__( database, original_database=None, db_name=None, - keep_uncertainty_data=False, biosphere_name=None, ): self.original_database = original_database @@ -164,7 +163,6 @@ def __init__( self.geo = Geomap(model) self.minor_issues_log = [] self.major_issues_log = [] - self.keep_uncertainty_data = keep_uncertainty_data self.biosphere_name = biosphere_name def check_matrix_squareness(self): @@ -202,18 +200,19 @@ def check_uncertainty(self): 12: {"loc", "scale", "shape"}, } - if self.keep_uncertainty_data is True: - for ds in self.database: - for exc in ds["exchanges"]: - if int(exc.get("uncertainty type", 0)) not in [0, 1]: - if not all( - f in exc - for f in MANDATORY_UNCERTAINTY_FIELDS[ - int(exc["uncertainty type"]) - ] - ): - message = f"Exchange {exc['name']} has incomplete uncertainty data." - self.log_issue(ds, "incomplete uncertainty data", message) + for ds in self.database: + for exc in ds["exchanges"]: + if int(exc.get("uncertainty type", 0)) not in [0, 1]: + if not all( + f in exc + for f in MANDATORY_UNCERTAINTY_FIELDS[ + int(exc["uncertainty type"]) + ] + ): + message = ( + f"Exchange {exc['name']} has incomplete uncertainty data." + ) + self.log_issue(ds, "incomplete uncertainty data", message) def check_datasets_integrity(self): # Verify no unintended loss of datasets