Merge pull request #1089 from openego/fix/#1088-dsm-time-series

Fix DSM time series
openego · Mar 16, 2023 · 80489d5 · 80489d5
2 parents fcf012b + 5d2bf3e
commit 80489d5
Show file tree

Hide file tree

Showing 3 changed files with 69 additions and 98 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -722,6 +722,8 @@ Bug Fixes
 * Fix URL of TYNDP scenario dataset
 * Automatically generated tasks now get unique :code:`task_id`\s.
   Fixes issue `#985`_ via PR `#986`_.
+* Fix faulty DSM time series
+  `#1088 <https://github.com/openego/eGon-data/issues/1088>`_
 * Set upper limit on commissioning date for units from MaStR
   dataset
   `#1098 <https://github.com/openego/eGon-data/issues/1098>`_

diff --git a/src/egon/data/datasets/DSM_cts_ind.py b/src/egon/data/datasets/DSM_cts_ind.py
@@ -1,3 +1,13 @@
+"""
+Currently, there are differences in the aggregated and individual DSM time
+series. These are caused by the truncation of the values at zero.
+
+The sum of the individual time series is a more accurate value than the
+aggregated time series used so far and should replace it in the future. Since
+the deviations are relatively small, a tolerance is currently accepted in the
+sanity checks. See [#1120](https://github.com/openego/eGon-data/issues/1120)
+for updates.
+"""
 from sqlalchemy import ARRAY, Column, Float, Integer, String
 from sqlalchemy.ext.declarative import declarative_base
 import geopandas as gpd
@@ -77,7 +87,7 @@ class DsmPotential(Dataset):
     def __init__(self, dependencies):
         super().__init__(
             name="DsmPotential",
-            version="0.0.4",
+            version="0.0.5",
             dependencies=dependencies,
             tasks=(dsm_cts_ind_processing),
         )
@@ -94,13 +104,11 @@ class EgonEtragoElectricityCtsDsmTimeseries(Base):
 
     bus = Column(Integer, primary_key=True, index=True)
     scn_name = Column(String, primary_key=True, index=True)
-    p_nom = Column(Float)
-    e_nom = Column(Float)
     p_set = Column(ARRAY(Float))
-    p_max_pu = Column(ARRAY(Float))
-    p_min_pu = Column(ARRAY(Float))
-    e_max_pu = Column(ARRAY(Float))
-    e_min_pu = Column(ARRAY(Float))
+    p_max = Column(ARRAY(Float))
+    p_min = Column(ARRAY(Float))
+    e_max = Column(ARRAY(Float))
+    e_min = Column(ARRAY(Float))
 
 
 class EgonOsmIndLoadCurvesIndividualDsmTimeseries(Base):
@@ -114,13 +122,11 @@ class EgonOsmIndLoadCurvesIndividualDsmTimeseries(Base):
     osm_id = Column(Integer, primary_key=True, index=True)
     scn_name = Column(String, primary_key=True, index=True)
     bus = Column(Integer)
-    p_nom = Column(Float)
-    e_nom = Column(Float)
     p_set = Column(ARRAY(Float))
-    p_max_pu = Column(ARRAY(Float))
-    p_min_pu = Column(ARRAY(Float))
-    e_max_pu = Column(ARRAY(Float))
-    e_min_pu = Column(ARRAY(Float))
+    p_max = Column(ARRAY(Float))
+    p_min = Column(ARRAY(Float))
+    e_max = Column(ARRAY(Float))
+    e_min = Column(ARRAY(Float))
 
 
 class EgonDemandregioSitesIndElectricityDsmTimeseries(Base):
@@ -135,13 +141,11 @@ class EgonDemandregioSitesIndElectricityDsmTimeseries(Base):
     scn_name = Column(String, primary_key=True, index=True)
     bus = Column(Integer)
     application = Column(String)
-    p_nom = Column(Float)
-    e_nom = Column(Float)
     p_set = Column(ARRAY(Float))
-    p_max_pu = Column(ARRAY(Float))
-    p_min_pu = Column(ARRAY(Float))
-    e_max_pu = Column(ARRAY(Float))
-    e_min_pu = Column(ARRAY(Float))
+    p_max = Column(ARRAY(Float))
+    p_min = Column(ARRAY(Float))
+    e_max = Column(ARRAY(Float))
+    e_min = Column(ARRAY(Float))
 
 
 class EgonSitesIndLoadCurvesIndividualDsmTimeseries(Base):
@@ -155,13 +159,11 @@ class EgonSitesIndLoadCurvesIndividualDsmTimeseries(Base):
     site_id = Column(Integer, primary_key=True, index=True)
     scn_name = Column(String, primary_key=True, index=True)
     bus = Column(Integer)
-    p_nom = Column(Float)
-    e_nom = Column(Float)
     p_set = Column(ARRAY(Float))
-    p_max_pu = Column(ARRAY(Float))
-    p_min_pu = Column(ARRAY(Float))
-    e_max_pu = Column(ARRAY(Float))
-    e_min_pu = Column(ARRAY(Float))
+    p_max = Column(ARRAY(Float))
+    p_min = Column(ARRAY(Float))
+    e_max = Column(ARRAY(Float))
+    e_min = Column(ARRAY(Float))
 
 
 # Code
@@ -526,14 +528,13 @@ def calculate_potentials(s_flex, s_util, s_inc, s_dec, delta_t, dsm):
     p_max = scheduled_load.copy()
     for index, liste in scheduled_load.items():
         lamb = lam.loc[index]
-        p_max.loc[index] = [lamb * s_inc - item for item in liste]
+        p_max.loc[index] = [max(0, lamb * s_inc - item) for item in liste]
 
     # P_min
     p_min = scheduled_load.copy()
     for index, liste in scheduled_load.items():
         lamb = lam.loc[index]
-
-        p_min.loc[index] = [-(item - lamb * s_dec) for item in liste]
+        p_min.loc[index] = [min(0, -(item - lamb * s_dec)) for item in liste]
 
     # calculation of E_max and E_min
 
@@ -959,8 +960,10 @@ def delete_dsm_entries(carrier):
 
     # buses
 
-    sql = f"""DELETE FROM {targets["bus"]["schema"]}.{targets["bus"]["table"]} b
-     WHERE (b.carrier LIKE '{carrier}');"""
+    sql = f"""
+    DELETE FROM {targets["bus"]["schema"]}.{targets["bus"]["table"]} b
+    WHERE (b.carrier LIKE '{carrier}');
+    """
     db.execute_sql(sql)
 
     # links
@@ -1307,46 +1310,6 @@ def dsm_cts_ind(
     data_export(dsm_buses, dsm_links, dsm_stores, carrier="dsm")
 
 
-def get_p_nom_e_nom(df: pd.DataFrame):
-    p_nom = [
-        max(max(val), max(abs(v) for v in df.p_min_pu.at[idx]))
-        for idx, val in df.p_max_pu.items()
-    ]
-
-    e_nom = [
-        max(max(val), max(abs(v) for v in df.e_min_pu.at[idx]))
-        for idx, val in df.e_max_pu.items()
-    ]
-
-    return df.assign(p_nom=p_nom, e_nom=e_nom)
-
-
-def calc_per_unit(df):
-    df = get_p_nom_e_nom(df)
-
-    for col in ["p_max_pu", "p_min_pu"]:
-        rslt = []
-
-        for idx, lst in df[col].items():
-            p_nom = df.p_nom.at[idx]
-
-            rslt.append([v / p_nom for v in lst])
-
-        df[col] = rslt
-
-    for col in ["e_max_pu", "e_min_pu"]:
-        rslt = []
-
-        for idx, lst in df[col].items():
-            e_nom = df.e_nom.at[idx]
-
-            rslt.append([v / e_nom for v in lst])
-
-        df[col] = rslt
-
-    return df
-
-
 def create_table(df, table, engine=CON):
     """Create table"""
     table.__table__.drop(bind=engine, checkfirst=True)
@@ -1361,6 +1324,10 @@ def create_table(df, table, engine=CON):
     )
 
 
+def div_list(lst: list, div: float):
+    return [v / div for v in lst]
+
+
 def dsm_cts_ind_individual(
     cts_cool_vent_ac_share=CTS_COOL_VENT_AC_SHARE,
     ind_vent_cool_share=IND_VENT_COOL_SHARE,
@@ -1410,19 +1377,22 @@ def dsm_cts_ind_individual(
         dsm=dsm,
     )
 
+    dsm = dsm.assign(
+        p_set=dsm.p_set.apply(div_list, div=cts_cool_vent_ac_share)
+    )
+
     base_columns = [
         "bus",
         "scn_name",
         "p_set",
-        "p_max_pu",
-        "p_min_pu",
-        "e_max_pu",
-        "e_min_pu",
+        "p_max",
+        "p_min",
+        "e_max",
+        "e_min",
     ]
 
     cts_df = pd.concat([dsm, *vals], axis=1, ignore_index=True)
     cts_df.columns = base_columns
-    cts_df = calc_per_unit(cts_df)
 
     print(" ")
     print("industry per osm-area: cooling and ventilation")
@@ -1441,11 +1411,12 @@ def dsm_cts_ind_individual(
         dsm=dsm,
     )
 
+    dsm = dsm.assign(p_set=dsm.p_set.apply(div_list, div=ind_vent_cool_share))
+
     columns = ["osm_id"] + base_columns
 
     osm_df = pd.concat([dsm, *vals], axis=1, ignore_index=True)
     osm_df.columns = columns
-    osm_df = calc_per_unit(osm_df)
 
     # industry sites
 
@@ -1485,7 +1456,6 @@ def dsm_cts_ind_individual(
 
     paper_df = pd.concat([dsm_paper, *vals], axis=1, ignore_index=True)
     paper_df.columns = columns
-    paper_df = calc_per_unit(paper_df)
 
     print(" ")
     print("industry sites: recycled paper")
@@ -1510,7 +1480,6 @@ def dsm_cts_ind_individual(
         [dsm_recycled_paper, *vals], axis=1, ignore_index=True
     )
     recycled_paper_df.columns = columns
-    recycled_paper_df = calc_per_unit(recycled_paper_df)
 
     print(" ")
     print("industry sites: pulp")
@@ -1531,7 +1500,6 @@ def dsm_cts_ind_individual(
 
     pulp_df = pd.concat([dsm_pulp, *vals], axis=1, ignore_index=True)
     pulp_df.columns = columns
-    pulp_df = calc_per_unit(pulp_df)
 
     # industry sites: cement
 
@@ -1554,7 +1522,6 @@ def dsm_cts_ind_individual(
 
     cement_df = pd.concat([dsm_cement, *vals], axis=1, ignore_index=True)
     cement_df.columns = columns
-    cement_df = calc_per_unit(cement_df)
 
     ind_df = pd.concat(
         [paper_df, recycled_paper_df, pulp_df, cement_df], ignore_index=True
@@ -1589,7 +1556,6 @@ def dsm_cts_ind_individual(
 
     ind_sites_df = pd.concat([dsm, *vals], axis=1, ignore_index=True)
     ind_sites_df.columns = columns
-    ind_sites_df = calc_per_unit(ind_sites_df)
 
     # create tables
     create_table(

diff --git a/src/egon/data/datasets/sanity_checks.py b/src/egon/data/datasets/sanity_checks.py
@@ -1453,7 +1453,7 @@ def df_from_series(s: pd.Series):
         for table in tables:
             target = targets[table]
             sql = f"""
-            SELECT bus, p_nom, e_nom, p_min_pu, p_max_pu, e_max_pu, e_min_pu
+            SELECT bus, p_min, p_max, e_max, e_min
             FROM {target["schema"]}.{target["table"]}
             WHERE scn_name = '{scenario}'
             ORDER BY bus
@@ -1465,19 +1465,18 @@ def df_from_series(s: pd.Series):
 
         groups = individual_ts_df[["bus"]].reset_index().groupby("bus").groups
 
-        individual_p_max_df = df_from_series(individual_ts_df.p_max_pu).mul(
-            individual_ts_df.p_nom
-        )
+        individual_p_max_df = df_from_series(individual_ts_df.p_max)
+
         individual_p_max_df = pd.DataFrame(
             [
                 individual_p_max_df[idxs].sum(axis=1)
                 for idxs in groups.values()
             ],
             index=groups.keys(),
         ).T
-        individual_p_min_df = df_from_series(individual_ts_df.p_min_pu).mul(
-            individual_ts_df.p_nom
-        )
+
+        individual_p_min_df = df_from_series(individual_ts_df.p_min)
+
         individual_p_min_df = pd.DataFrame(
             [
                 individual_p_min_df[idxs].sum(axis=1)
@@ -1486,8 +1485,14 @@ def df_from_series(s: pd.Series):
             index=groups.keys(),
         ).T
 
-        assert np.isclose(p_max_df, individual_p_max_df).all()
-        assert np.isclose(p_min_df, individual_p_min_df).all()
+        # due to the fact that time series are clipped at zero (either
+        # direction) there is a little difference between the sum of the
+        # individual time series and the aggregated time series as the second
+        # is generated independent of the others. This makes atol=1e-01
+        # necessary.
+        atol = 1e-01
+        assert np.allclose(p_max_df, individual_p_max_df, atol=atol)
+        assert np.allclose(p_min_df, individual_p_min_df, atol=atol)
 
         # e_min and e_max
         sql = f"""
@@ -1516,19 +1521,17 @@ def df_from_series(s: pd.Series):
         e_max_df.columns = meta_df.bus.tolist()
         e_min_df.columns = meta_df.bus.tolist()
 
-        individual_e_max_df = df_from_series(individual_ts_df.e_max_pu).mul(
-            individual_ts_df.e_nom
-        )
+        individual_e_max_df = df_from_series(individual_ts_df.e_max)
+
         individual_e_max_df = pd.DataFrame(
             [
                 individual_e_max_df[idxs].sum(axis=1)
                 for idxs in groups.values()
             ],
             index=groups.keys(),
         ).T
-        individual_e_min_df = df_from_series(individual_ts_df.e_min_pu).mul(
-            individual_ts_df.e_nom
-        )
+        individual_e_min_df = df_from_series(individual_ts_df.e_min)
+
         individual_e_min_df = pd.DataFrame(
             [
                 individual_e_min_df[idxs].sum(axis=1)
@@ -1537,5 +1540,5 @@ def df_from_series(s: pd.Series):
             index=groups.keys(),
         ).T
 
-        assert np.isclose(e_max_df, individual_e_max_df).all()
-        assert np.isclose(e_min_df, individual_e_min_df).all()
+        assert np.allclose(e_max_df, individual_e_max_df)
+        assert np.allclose(e_min_df, individual_e_min_df)