From a2794faa73f6331cc7dc41c9e3515ac81df3d3ca Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Thu, 26 Jan 2023 16:25:17 +0100 Subject: [PATCH 1/8] fixed missing clipping for p_min_pu and p_max_pu --- src/egon/data/datasets/DSM_cts_ind.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/egon/data/datasets/DSM_cts_ind.py b/src/egon/data/datasets/DSM_cts_ind.py index 06a791181..5fbc5dd72 100644 --- a/src/egon/data/datasets/DSM_cts_ind.py +++ b/src/egon/data/datasets/DSM_cts_ind.py @@ -526,14 +526,14 @@ def calculate_potentials(s_flex, s_util, s_inc, s_dec, delta_t, dsm): p_max = scheduled_load.copy() for index, liste in scheduled_load.items(): lamb = lam.loc[index] - p_max.loc[index] = [lamb * s_inc - item for item in liste] + p_max.loc[index] = [max(0, lamb * s_inc - item) for item in liste] # P_min p_min = scheduled_load.copy() for index, liste in scheduled_load.items(): lamb = lam.loc[index] - p_min.loc[index] = [-(item - lamb * s_dec) for item in liste] + p_min.loc[index] = [min(0, -(item - lamb * s_dec)) for item in liste] # calculation of E_max and E_min @@ -959,8 +959,10 @@ def delete_dsm_entries(carrier): # buses - sql = f"""DELETE FROM {targets["bus"]["schema"]}.{targets["bus"]["table"]} b - WHERE (b.carrier LIKE '{carrier}');""" + sql = f""" + DELETE FROM {targets["bus"]["schema"]}.{targets["bus"]["table"]} b + WHERE (b.carrier LIKE '{carrier}'); + """ db.execute_sql(sql) # links From ad5a7756a8df6a5e9f62cfa2a5aaa3270e585d59 Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Thu, 26 Jan 2023 16:30:23 +0100 Subject: [PATCH 2/8] minor formatting --- src/egon/data/datasets/DSM_cts_ind.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/egon/data/datasets/DSM_cts_ind.py b/src/egon/data/datasets/DSM_cts_ind.py index 5fbc5dd72..1a1b60791 100644 --- a/src/egon/data/datasets/DSM_cts_ind.py +++ b/src/egon/data/datasets/DSM_cts_ind.py @@ -532,7 +532,6 @@ def calculate_potentials(s_flex, s_util, s_inc, s_dec, delta_t, dsm): p_min = scheduled_load.copy() for index, liste in scheduled_load.items(): lamb = lam.loc[index] - p_min.loc[index] = [min(0, -(item - lamb * s_dec)) for item in liste] # calculation of E_max and E_min From ad76d8cd8243c245167537b6a8f764433da3105f Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Wed, 1 Feb 2023 09:52:49 +0100 Subject: [PATCH 3/8] adapted tests to respect small differences between individual and aggregated time series due to clipping at zero --- src/egon/data/datasets/DSM_cts_ind.py | 2 +- src/egon/data/datasets/sanity_checks.py | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/egon/data/datasets/DSM_cts_ind.py b/src/egon/data/datasets/DSM_cts_ind.py index 1a1b60791..10cd7b8c6 100644 --- a/src/egon/data/datasets/DSM_cts_ind.py +++ b/src/egon/data/datasets/DSM_cts_ind.py @@ -77,7 +77,7 @@ class DsmPotential(Dataset): def __init__(self, dependencies): super().__init__( name="DsmPotential", - version="0.0.4", + version="0.0.5.dev", dependencies=dependencies, tasks=(dsm_cts_ind_processing), ) diff --git a/src/egon/data/datasets/sanity_checks.py b/src/egon/data/datasets/sanity_checks.py index aedb9ad9c..5df0c68d3 100644 --- a/src/egon/data/datasets/sanity_checks.py +++ b/src/egon/data/datasets/sanity_checks.py @@ -1486,8 +1486,13 @@ def df_from_series(s: pd.Series): index=groups.keys(), ).T - assert np.isclose(p_max_df, individual_p_max_df).all() - assert np.isclose(p_min_df, individual_p_min_df).all() + # due to the fact that time series are clipped at zero (either + # direction) there is a little difference between the sum of the + # individual time series and the aggregated time series as the second + # is generated independent of the others. This makes atol=1e-03 + # necessary. + assert np.allclose(p_max_df, individual_p_max_df, atol=1e-03) + assert np.allclose(p_min_df, individual_p_min_df, atol=1e-03) # e_min and e_max sql = f""" @@ -1537,5 +1542,5 @@ def df_from_series(s: pd.Series): index=groups.keys(), ).T - assert np.isclose(e_max_df, individual_e_max_df).all() - assert np.isclose(e_min_df, individual_e_min_df).all() + assert np.allclose(e_max_df, individual_e_max_df) + assert np.allclose(e_min_df, individual_e_min_df) From ab233eac4a5f7e668fb59bf6a0afa5f484fc91b9 Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Wed, 1 Feb 2023 17:35:48 +0100 Subject: [PATCH 4/8] saving correct p_set time series; saving absolute time series instead of relative --- src/egon/data/datasets/DSM_cts_ind.py | 105 ++++++++------------------ 1 file changed, 30 insertions(+), 75 deletions(-) diff --git a/src/egon/data/datasets/DSM_cts_ind.py b/src/egon/data/datasets/DSM_cts_ind.py index 10cd7b8c6..f91cb8d2a 100644 --- a/src/egon/data/datasets/DSM_cts_ind.py +++ b/src/egon/data/datasets/DSM_cts_ind.py @@ -94,13 +94,11 @@ class EgonEtragoElectricityCtsDsmTimeseries(Base): bus = Column(Integer, primary_key=True, index=True) scn_name = Column(String, primary_key=True, index=True) - p_nom = Column(Float) - e_nom = Column(Float) p_set = Column(ARRAY(Float)) - p_max_pu = Column(ARRAY(Float)) - p_min_pu = Column(ARRAY(Float)) - e_max_pu = Column(ARRAY(Float)) - e_min_pu = Column(ARRAY(Float)) + p_max = Column(ARRAY(Float)) + p_min = Column(ARRAY(Float)) + e_max = Column(ARRAY(Float)) + e_min = Column(ARRAY(Float)) class EgonOsmIndLoadCurvesIndividualDsmTimeseries(Base): @@ -114,13 +112,11 @@ class EgonOsmIndLoadCurvesIndividualDsmTimeseries(Base): osm_id = Column(Integer, primary_key=True, index=True) scn_name = Column(String, primary_key=True, index=True) bus = Column(Integer) - p_nom = Column(Float) - e_nom = Column(Float) p_set = Column(ARRAY(Float)) - p_max_pu = Column(ARRAY(Float)) - p_min_pu = Column(ARRAY(Float)) - e_max_pu = Column(ARRAY(Float)) - e_min_pu = Column(ARRAY(Float)) + p_max = Column(ARRAY(Float)) + p_min = Column(ARRAY(Float)) + e_max = Column(ARRAY(Float)) + e_min = Column(ARRAY(Float)) class EgonDemandregioSitesIndElectricityDsmTimeseries(Base): @@ -135,13 +131,11 @@ class EgonDemandregioSitesIndElectricityDsmTimeseries(Base): scn_name = Column(String, primary_key=True, index=True) bus = Column(Integer) application = Column(String) - p_nom = Column(Float) - e_nom = Column(Float) p_set = Column(ARRAY(Float)) - p_max_pu = Column(ARRAY(Float)) - p_min_pu = Column(ARRAY(Float)) - e_max_pu = Column(ARRAY(Float)) - e_min_pu = Column(ARRAY(Float)) + p_max = Column(ARRAY(Float)) + p_min = Column(ARRAY(Float)) + e_max = Column(ARRAY(Float)) + e_min = Column(ARRAY(Float)) class EgonSitesIndLoadCurvesIndividualDsmTimeseries(Base): @@ -155,13 +149,11 @@ class EgonSitesIndLoadCurvesIndividualDsmTimeseries(Base): site_id = Column(Integer, primary_key=True, index=True) scn_name = Column(String, primary_key=True, index=True) bus = Column(Integer) - p_nom = Column(Float) - e_nom = Column(Float) p_set = Column(ARRAY(Float)) - p_max_pu = Column(ARRAY(Float)) - p_min_pu = Column(ARRAY(Float)) - e_max_pu = Column(ARRAY(Float)) - e_min_pu = Column(ARRAY(Float)) + p_max = Column(ARRAY(Float)) + p_min = Column(ARRAY(Float)) + e_max = Column(ARRAY(Float)) + e_min = Column(ARRAY(Float)) # Code @@ -1308,46 +1300,6 @@ def dsm_cts_ind( data_export(dsm_buses, dsm_links, dsm_stores, carrier="dsm") -def get_p_nom_e_nom(df: pd.DataFrame): - p_nom = [ - max(max(val), max(abs(v) for v in df.p_min_pu.at[idx])) - for idx, val in df.p_max_pu.items() - ] - - e_nom = [ - max(max(val), max(abs(v) for v in df.e_min_pu.at[idx])) - for idx, val in df.e_max_pu.items() - ] - - return df.assign(p_nom=p_nom, e_nom=e_nom) - - -def calc_per_unit(df): - df = get_p_nom_e_nom(df) - - for col in ["p_max_pu", "p_min_pu"]: - rslt = [] - - for idx, lst in df[col].items(): - p_nom = df.p_nom.at[idx] - - rslt.append([v / p_nom for v in lst]) - - df[col] = rslt - - for col in ["e_max_pu", "e_min_pu"]: - rslt = [] - - for idx, lst in df[col].items(): - e_nom = df.e_nom.at[idx] - - rslt.append([v / e_nom for v in lst]) - - df[col] = rslt - - return df - - def create_table(df, table, engine=CON): """Create table""" table.__table__.drop(bind=engine, checkfirst=True) @@ -1362,6 +1314,10 @@ def create_table(df, table, engine=CON): ) +def div_list(lst: list, div: float): + return [v / div for v in lst] + + def dsm_cts_ind_individual( cts_cool_vent_ac_share=CTS_COOL_VENT_AC_SHARE, ind_vent_cool_share=IND_VENT_COOL_SHARE, @@ -1411,19 +1367,22 @@ def dsm_cts_ind_individual( dsm=dsm, ) + dsm = dsm.assign( + p_set=dsm.p_set.apply(div_list, div=cts_cool_vent_ac_share) + ) + base_columns = [ "bus", "scn_name", "p_set", - "p_max_pu", - "p_min_pu", - "e_max_pu", - "e_min_pu", + "p_max", + "p_min", + "e_max", + "e_min", ] cts_df = pd.concat([dsm, *vals], axis=1, ignore_index=True) cts_df.columns = base_columns - cts_df = calc_per_unit(cts_df) print(" ") print("industry per osm-area: cooling and ventilation") @@ -1442,11 +1401,12 @@ def dsm_cts_ind_individual( dsm=dsm, ) + dsm = dsm.assign(p_set=dsm.p_set.apply(div_list, div=ind_vent_cool_share)) + columns = ["osm_id"] + base_columns osm_df = pd.concat([dsm, *vals], axis=1, ignore_index=True) osm_df.columns = columns - osm_df = calc_per_unit(osm_df) # industry sites @@ -1486,7 +1446,6 @@ def dsm_cts_ind_individual( paper_df = pd.concat([dsm_paper, *vals], axis=1, ignore_index=True) paper_df.columns = columns - paper_df = calc_per_unit(paper_df) print(" ") print("industry sites: recycled paper") @@ -1511,7 +1470,6 @@ def dsm_cts_ind_individual( [dsm_recycled_paper, *vals], axis=1, ignore_index=True ) recycled_paper_df.columns = columns - recycled_paper_df = calc_per_unit(recycled_paper_df) print(" ") print("industry sites: pulp") @@ -1532,7 +1490,6 @@ def dsm_cts_ind_individual( pulp_df = pd.concat([dsm_pulp, *vals], axis=1, ignore_index=True) pulp_df.columns = columns - pulp_df = calc_per_unit(pulp_df) # industry sites: cement @@ -1555,7 +1512,6 @@ def dsm_cts_ind_individual( cement_df = pd.concat([dsm_cement, *vals], axis=1, ignore_index=True) cement_df.columns = columns - cement_df = calc_per_unit(cement_df) ind_df = pd.concat( [paper_df, recycled_paper_df, pulp_df, cement_df], ignore_index=True @@ -1590,7 +1546,6 @@ def dsm_cts_ind_individual( ind_sites_df = pd.concat([dsm, *vals], axis=1, ignore_index=True) ind_sites_df.columns = columns - ind_sites_df = calc_per_unit(ind_sites_df) # create tables create_table( From 6b3a5391720105a34d175b2f487a7a78c0c7bd02 Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Thu, 2 Feb 2023 14:59:59 +0100 Subject: [PATCH 5/8] adapted sanity checks to new structure --- src/egon/data/datasets/sanity_checks.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/egon/data/datasets/sanity_checks.py b/src/egon/data/datasets/sanity_checks.py index 5df0c68d3..e7ff05850 100644 --- a/src/egon/data/datasets/sanity_checks.py +++ b/src/egon/data/datasets/sanity_checks.py @@ -1453,7 +1453,7 @@ def df_from_series(s: pd.Series): for table in tables: target = targets[table] sql = f""" - SELECT bus, p_nom, e_nom, p_min_pu, p_max_pu, e_max_pu, e_min_pu + SELECT bus, p_min, p_max, e_max, e_min FROM {target["schema"]}.{target["table"]} WHERE scn_name = '{scenario}' ORDER BY bus @@ -1465,9 +1465,8 @@ def df_from_series(s: pd.Series): groups = individual_ts_df[["bus"]].reset_index().groupby("bus").groups - individual_p_max_df = df_from_series(individual_ts_df.p_max_pu).mul( - individual_ts_df.p_nom - ) + individual_p_max_df = df_from_series(individual_ts_df.p_max) + individual_p_max_df = pd.DataFrame( [ individual_p_max_df[idxs].sum(axis=1) @@ -1475,9 +1474,9 @@ def df_from_series(s: pd.Series): ], index=groups.keys(), ).T - individual_p_min_df = df_from_series(individual_ts_df.p_min_pu).mul( - individual_ts_df.p_nom - ) + + individual_p_min_df = df_from_series(individual_ts_df.p_min) + individual_p_min_df = pd.DataFrame( [ individual_p_min_df[idxs].sum(axis=1) @@ -1521,9 +1520,8 @@ def df_from_series(s: pd.Series): e_max_df.columns = meta_df.bus.tolist() e_min_df.columns = meta_df.bus.tolist() - individual_e_max_df = df_from_series(individual_ts_df.e_max_pu).mul( - individual_ts_df.e_nom - ) + individual_e_max_df = df_from_series(individual_ts_df.e_max) + individual_e_max_df = pd.DataFrame( [ individual_e_max_df[idxs].sum(axis=1) @@ -1531,9 +1529,8 @@ def df_from_series(s: pd.Series): ], index=groups.keys(), ).T - individual_e_min_df = df_from_series(individual_ts_df.e_min_pu).mul( - individual_ts_df.e_nom - ) + individual_e_min_df = df_from_series(individual_ts_df.e_min) + individual_e_min_df = pd.DataFrame( [ individual_e_min_df[idxs].sum(axis=1) From 01a3949f5d2a22fcf5d58d6ee3046d8c2056ae59 Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Thu, 2 Feb 2023 15:03:45 +0100 Subject: [PATCH 6/8] adapt dataset version and changelog --- CHANGELOG.rst | 2 ++ src/egon/data/datasets/DSM_cts_ind.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9c76a8416..da49ef845 100755 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -679,6 +679,8 @@ Bug Fixes * Fix URL of TYNDP scenario dataset * Automatically generated tasks now get unique :code:`task_id`\s. Fixes issue `#985`_ via PR `#986`_. +* Fix faulty DSM time series + `#1088 `_ .. _PR #692: https://github.com/openego/eGon-data/pull/692 .. _#343: https://github.com/openego/eGon-data/issues/343 diff --git a/src/egon/data/datasets/DSM_cts_ind.py b/src/egon/data/datasets/DSM_cts_ind.py index f91cb8d2a..ebf157fa7 100644 --- a/src/egon/data/datasets/DSM_cts_ind.py +++ b/src/egon/data/datasets/DSM_cts_ind.py @@ -77,7 +77,7 @@ class DsmPotential(Dataset): def __init__(self, dependencies): super().__init__( name="DsmPotential", - version="0.0.5.dev", + version="0.0.5", dependencies=dependencies, tasks=(dsm_cts_ind_processing), ) From 6340b0ee3fd0da13fd212fdb3fb4e0c2100f68cf Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Thu, 16 Mar 2023 11:34:59 +0100 Subject: [PATCH 7/8] set higher tolerance --- src/egon/data/datasets/sanity_checks.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/egon/data/datasets/sanity_checks.py b/src/egon/data/datasets/sanity_checks.py index e7ff05850..859bcec5c 100644 --- a/src/egon/data/datasets/sanity_checks.py +++ b/src/egon/data/datasets/sanity_checks.py @@ -1488,10 +1488,11 @@ def df_from_series(s: pd.Series): # due to the fact that time series are clipped at zero (either # direction) there is a little difference between the sum of the # individual time series and the aggregated time series as the second - # is generated independent of the others. This makes atol=1e-03 + # is generated independent of the others. This makes atol=1e-01 # necessary. - assert np.allclose(p_max_df, individual_p_max_df, atol=1e-03) - assert np.allclose(p_min_df, individual_p_min_df, atol=1e-03) + atol = 1e-01 + assert np.allclose(p_max_df, individual_p_max_df, atol=atol) + assert np.allclose(p_min_df, individual_p_min_df, atol=atol) # e_min and e_max sql = f""" From 5d2bf3e4403bd4252a1499fcc8bbf7599c4fd877 Mon Sep 17 00:00:00 2001 From: Kilian Helfenbein Date: Thu, 16 Mar 2023 11:43:02 +0100 Subject: [PATCH 8/8] add notice about differences between individual and aggregated time series --- src/egon/data/datasets/DSM_cts_ind.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/egon/data/datasets/DSM_cts_ind.py b/src/egon/data/datasets/DSM_cts_ind.py index ebf157fa7..4990ccb9a 100644 --- a/src/egon/data/datasets/DSM_cts_ind.py +++ b/src/egon/data/datasets/DSM_cts_ind.py @@ -1,3 +1,13 @@ +""" +Currently, there are differences in the aggregated and individual DSM time +series. These are caused by the truncation of the values at zero. + +The sum of the individual time series is a more accurate value than the +aggregated time series used so far and should replace it in the future. Since +the deviations are relatively small, a tolerance is currently accepted in the +sanity checks. See [#1120](https://github.com/openego/eGon-data/issues/1120) +for updates. +""" from sqlalchemy import ARRAY, Column, Float, Integer, String from sqlalchemy.ext.declarative import declarative_base import geopandas as gpd