From a2794faa73f6331cc7dc41c9e3515ac81df3d3ca Mon Sep 17 00:00:00 2001
From: Kilian Helfenbein <Kilian.Helfenbein@rl-institut.de>
Date: Thu, 26 Jan 2023 16:25:17 +0100
Subject: [PATCH 1/8] fixed missing clipping for p_min_pu and p_max_pu

---
 src/egon/data/datasets/DSM_cts_ind.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/egon/data/datasets/DSM_cts_ind.py b/src/egon/data/datasets/DSM_cts_ind.py
index 06a791181..5fbc5dd72 100644
--- a/src/egon/data/datasets/DSM_cts_ind.py
+++ b/src/egon/data/datasets/DSM_cts_ind.py
@@ -526,14 +526,14 @@ def calculate_potentials(s_flex, s_util, s_inc, s_dec, delta_t, dsm):
     p_max = scheduled_load.copy()
     for index, liste in scheduled_load.items():
         lamb = lam.loc[index]
-        p_max.loc[index] = [lamb * s_inc - item for item in liste]
+        p_max.loc[index] = [max(0, lamb * s_inc - item) for item in liste]
 
     # P_min
     p_min = scheduled_load.copy()
     for index, liste in scheduled_load.items():
         lamb = lam.loc[index]
 
-        p_min.loc[index] = [-(item - lamb * s_dec) for item in liste]
+        p_min.loc[index] = [min(0, -(item - lamb * s_dec)) for item in liste]
 
     # calculation of E_max and E_min
 
@@ -959,8 +959,10 @@ def delete_dsm_entries(carrier):
 
     # buses
 
-    sql = f"""DELETE FROM {targets["bus"]["schema"]}.{targets["bus"]["table"]} b
-     WHERE (b.carrier LIKE '{carrier}');"""
+    sql = f"""
+    DELETE FROM {targets["bus"]["schema"]}.{targets["bus"]["table"]} b
+    WHERE (b.carrier LIKE '{carrier}');
+    """
     db.execute_sql(sql)
 
     # links

From ad5a7756a8df6a5e9f62cfa2a5aaa3270e585d59 Mon Sep 17 00:00:00 2001
From: Kilian Helfenbein <Kilian.Helfenbein@rl-institut.de>
Date: Thu, 26 Jan 2023 16:30:23 +0100
Subject: [PATCH 2/8] minor formatting

---
 src/egon/data/datasets/DSM_cts_ind.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/egon/data/datasets/DSM_cts_ind.py b/src/egon/data/datasets/DSM_cts_ind.py
index 5fbc5dd72..1a1b60791 100644
--- a/src/egon/data/datasets/DSM_cts_ind.py
+++ b/src/egon/data/datasets/DSM_cts_ind.py
@@ -532,7 +532,6 @@ def calculate_potentials(s_flex, s_util, s_inc, s_dec, delta_t, dsm):
     p_min = scheduled_load.copy()
     for index, liste in scheduled_load.items():
         lamb = lam.loc[index]
-
         p_min.loc[index] = [min(0, -(item - lamb * s_dec)) for item in liste]
 
     # calculation of E_max and E_min

From ad76d8cd8243c245167537b6a8f764433da3105f Mon Sep 17 00:00:00 2001
From: Kilian Helfenbein <Kilian.Helfenbein@rl-institut.de>
Date: Wed, 1 Feb 2023 09:52:49 +0100
Subject: [PATCH 3/8] adapted tests to respect small differences between
 individual and aggregated time series due to clipping at zero

---
 src/egon/data/datasets/DSM_cts_ind.py   |  2 +-
 src/egon/data/datasets/sanity_checks.py | 13 +++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/egon/data/datasets/DSM_cts_ind.py b/src/egon/data/datasets/DSM_cts_ind.py
index 1a1b60791..10cd7b8c6 100644
--- a/src/egon/data/datasets/DSM_cts_ind.py
+++ b/src/egon/data/datasets/DSM_cts_ind.py
@@ -77,7 +77,7 @@ class DsmPotential(Dataset):
     def __init__(self, dependencies):
         super().__init__(
             name="DsmPotential",
-            version="0.0.4",
+            version="0.0.5.dev",
             dependencies=dependencies,
             tasks=(dsm_cts_ind_processing),
         )
diff --git a/src/egon/data/datasets/sanity_checks.py b/src/egon/data/datasets/sanity_checks.py
index aedb9ad9c..5df0c68d3 100644
--- a/src/egon/data/datasets/sanity_checks.py
+++ b/src/egon/data/datasets/sanity_checks.py
@@ -1486,8 +1486,13 @@ def df_from_series(s: pd.Series):
             index=groups.keys(),
         ).T
 
-        assert np.isclose(p_max_df, individual_p_max_df).all()
-        assert np.isclose(p_min_df, individual_p_min_df).all()
+        # due to the fact that time series are clipped at zero (either
+        # direction) there is a little difference between the sum of the
+        # individual time series and the aggregated time series as the second
+        # is generated independent of the others. This makes atol=1e-03
+        # necessary.
+        assert np.allclose(p_max_df, individual_p_max_df, atol=1e-03)
+        assert np.allclose(p_min_df, individual_p_min_df, atol=1e-03)
 
         # e_min and e_max
         sql = f"""
@@ -1537,5 +1542,5 @@ def df_from_series(s: pd.Series):
             index=groups.keys(),
         ).T
 
-        assert np.isclose(e_max_df, individual_e_max_df).all()
-        assert np.isclose(e_min_df, individual_e_min_df).all()
+        assert np.allclose(e_max_df, individual_e_max_df)
+        assert np.allclose(e_min_df, individual_e_min_df)

From ab233eac4a5f7e668fb59bf6a0afa5f484fc91b9 Mon Sep 17 00:00:00 2001
From: Kilian Helfenbein <Kilian.Helfenbein@rl-institut.de>
Date: Wed, 1 Feb 2023 17:35:48 +0100
Subject: [PATCH 4/8] saving correct p_set time series; saving absolute time
 series instead of relative

---
 src/egon/data/datasets/DSM_cts_ind.py | 105 ++++++++------------------
 1 file changed, 30 insertions(+), 75 deletions(-)

diff --git a/src/egon/data/datasets/DSM_cts_ind.py b/src/egon/data/datasets/DSM_cts_ind.py
index 10cd7b8c6..f91cb8d2a 100644
--- a/src/egon/data/datasets/DSM_cts_ind.py
+++ b/src/egon/data/datasets/DSM_cts_ind.py
@@ -94,13 +94,11 @@ class EgonEtragoElectricityCtsDsmTimeseries(Base):
 
     bus = Column(Integer, primary_key=True, index=True)
     scn_name = Column(String, primary_key=True, index=True)
-    p_nom = Column(Float)
-    e_nom = Column(Float)
     p_set = Column(ARRAY(Float))
-    p_max_pu = Column(ARRAY(Float))
-    p_min_pu = Column(ARRAY(Float))
-    e_max_pu = Column(ARRAY(Float))
-    e_min_pu = Column(ARRAY(Float))
+    p_max = Column(ARRAY(Float))
+    p_min = Column(ARRAY(Float))
+    e_max = Column(ARRAY(Float))
+    e_min = Column(ARRAY(Float))
 
 
 class EgonOsmIndLoadCurvesIndividualDsmTimeseries(Base):
@@ -114,13 +112,11 @@ class EgonOsmIndLoadCurvesIndividualDsmTimeseries(Base):
     osm_id = Column(Integer, primary_key=True, index=True)
     scn_name = Column(String, primary_key=True, index=True)
     bus = Column(Integer)
-    p_nom = Column(Float)
-    e_nom = Column(Float)
     p_set = Column(ARRAY(Float))
-    p_max_pu = Column(ARRAY(Float))
-    p_min_pu = Column(ARRAY(Float))
-    e_max_pu = Column(ARRAY(Float))
-    e_min_pu = Column(ARRAY(Float))
+    p_max = Column(ARRAY(Float))
+    p_min = Column(ARRAY(Float))
+    e_max = Column(ARRAY(Float))
+    e_min = Column(ARRAY(Float))
 
 
 class EgonDemandregioSitesIndElectricityDsmTimeseries(Base):
@@ -135,13 +131,11 @@ class EgonDemandregioSitesIndElectricityDsmTimeseries(Base):
     scn_name = Column(String, primary_key=True, index=True)
     bus = Column(Integer)
     application = Column(String)
-    p_nom = Column(Float)
-    e_nom = Column(Float)
     p_set = Column(ARRAY(Float))
-    p_max_pu = Column(ARRAY(Float))
-    p_min_pu = Column(ARRAY(Float))
-    e_max_pu = Column(ARRAY(Float))
-    e_min_pu = Column(ARRAY(Float))
+    p_max = Column(ARRAY(Float))
+    p_min = Column(ARRAY(Float))
+    e_max = Column(ARRAY(Float))
+    e_min = Column(ARRAY(Float))
 
 
 class EgonSitesIndLoadCurvesIndividualDsmTimeseries(Base):
@@ -155,13 +149,11 @@ class EgonSitesIndLoadCurvesIndividualDsmTimeseries(Base):
     site_id = Column(Integer, primary_key=True, index=True)
     scn_name = Column(String, primary_key=True, index=True)
     bus = Column(Integer)
-    p_nom = Column(Float)
-    e_nom = Column(Float)
     p_set = Column(ARRAY(Float))
-    p_max_pu = Column(ARRAY(Float))
-    p_min_pu = Column(ARRAY(Float))
-    e_max_pu = Column(ARRAY(Float))
-    e_min_pu = Column(ARRAY(Float))
+    p_max = Column(ARRAY(Float))
+    p_min = Column(ARRAY(Float))
+    e_max = Column(ARRAY(Float))
+    e_min = Column(ARRAY(Float))
 
 
 # Code
@@ -1308,46 +1300,6 @@ def dsm_cts_ind(
     data_export(dsm_buses, dsm_links, dsm_stores, carrier="dsm")
 
 
-def get_p_nom_e_nom(df: pd.DataFrame):
-    p_nom = [
-        max(max(val), max(abs(v) for v in df.p_min_pu.at[idx]))
-        for idx, val in df.p_max_pu.items()
-    ]
-
-    e_nom = [
-        max(max(val), max(abs(v) for v in df.e_min_pu.at[idx]))
-        for idx, val in df.e_max_pu.items()
-    ]
-
-    return df.assign(p_nom=p_nom, e_nom=e_nom)
-
-
-def calc_per_unit(df):
-    df = get_p_nom_e_nom(df)
-
-    for col in ["p_max_pu", "p_min_pu"]:
-        rslt = []
-
-        for idx, lst in df[col].items():
-            p_nom = df.p_nom.at[idx]
-
-            rslt.append([v / p_nom for v in lst])
-
-        df[col] = rslt
-
-    for col in ["e_max_pu", "e_min_pu"]:
-        rslt = []
-
-        for idx, lst in df[col].items():
-            e_nom = df.e_nom.at[idx]
-
-            rslt.append([v / e_nom for v in lst])
-
-        df[col] = rslt
-
-    return df
-
-
 def create_table(df, table, engine=CON):
     """Create table"""
     table.__table__.drop(bind=engine, checkfirst=True)
@@ -1362,6 +1314,10 @@ def create_table(df, table, engine=CON):
     )
 
 
+def div_list(lst: list, div: float):
+    return [v / div for v in lst]
+
+
 def dsm_cts_ind_individual(
     cts_cool_vent_ac_share=CTS_COOL_VENT_AC_SHARE,
     ind_vent_cool_share=IND_VENT_COOL_SHARE,
@@ -1411,19 +1367,22 @@ def dsm_cts_ind_individual(
         dsm=dsm,
     )
 
+    dsm = dsm.assign(
+        p_set=dsm.p_set.apply(div_list, div=cts_cool_vent_ac_share)
+    )
+
     base_columns = [
         "bus",
         "scn_name",
         "p_set",
-        "p_max_pu",
-        "p_min_pu",
-        "e_max_pu",
-        "e_min_pu",
+        "p_max",
+        "p_min",
+        "e_max",
+        "e_min",
     ]
 
     cts_df = pd.concat([dsm, *vals], axis=1, ignore_index=True)
     cts_df.columns = base_columns
-    cts_df = calc_per_unit(cts_df)
 
     print(" ")
     print("industry per osm-area: cooling and ventilation")
@@ -1442,11 +1401,12 @@ def dsm_cts_ind_individual(
         dsm=dsm,
     )
 
+    dsm = dsm.assign(p_set=dsm.p_set.apply(div_list, div=ind_vent_cool_share))
+
     columns = ["osm_id"] + base_columns
 
     osm_df = pd.concat([dsm, *vals], axis=1, ignore_index=True)
     osm_df.columns = columns
-    osm_df = calc_per_unit(osm_df)
 
     # industry sites
 
@@ -1486,7 +1446,6 @@ def dsm_cts_ind_individual(
 
     paper_df = pd.concat([dsm_paper, *vals], axis=1, ignore_index=True)
     paper_df.columns = columns
-    paper_df = calc_per_unit(paper_df)
 
     print(" ")
     print("industry sites: recycled paper")
@@ -1511,7 +1470,6 @@ def dsm_cts_ind_individual(
         [dsm_recycled_paper, *vals], axis=1, ignore_index=True
     )
     recycled_paper_df.columns = columns
-    recycled_paper_df = calc_per_unit(recycled_paper_df)
 
     print(" ")
     print("industry sites: pulp")
@@ -1532,7 +1490,6 @@ def dsm_cts_ind_individual(
 
     pulp_df = pd.concat([dsm_pulp, *vals], axis=1, ignore_index=True)
     pulp_df.columns = columns
-    pulp_df = calc_per_unit(pulp_df)
 
     # industry sites: cement
 
@@ -1555,7 +1512,6 @@ def dsm_cts_ind_individual(
 
     cement_df = pd.concat([dsm_cement, *vals], axis=1, ignore_index=True)
     cement_df.columns = columns
-    cement_df = calc_per_unit(cement_df)
 
     ind_df = pd.concat(
         [paper_df, recycled_paper_df, pulp_df, cement_df], ignore_index=True
@@ -1590,7 +1546,6 @@ def dsm_cts_ind_individual(
 
     ind_sites_df = pd.concat([dsm, *vals], axis=1, ignore_index=True)
     ind_sites_df.columns = columns
-    ind_sites_df = calc_per_unit(ind_sites_df)
 
     # create tables
     create_table(

From 6b3a5391720105a34d175b2f487a7a78c0c7bd02 Mon Sep 17 00:00:00 2001
From: Kilian Helfenbein <Kilian.Helfenbein@rl-institut.de>
Date: Thu, 2 Feb 2023 14:59:59 +0100
Subject: [PATCH 5/8] adapted sanity checks to new structure

---
 src/egon/data/datasets/sanity_checks.py | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/src/egon/data/datasets/sanity_checks.py b/src/egon/data/datasets/sanity_checks.py
index 5df0c68d3..e7ff05850 100644
--- a/src/egon/data/datasets/sanity_checks.py
+++ b/src/egon/data/datasets/sanity_checks.py
@@ -1453,7 +1453,7 @@ def df_from_series(s: pd.Series):
         for table in tables:
             target = targets[table]
             sql = f"""
-            SELECT bus, p_nom, e_nom, p_min_pu, p_max_pu, e_max_pu, e_min_pu
+            SELECT bus, p_min, p_max, e_max, e_min
             FROM {target["schema"]}.{target["table"]}
             WHERE scn_name = '{scenario}'
             ORDER BY bus
@@ -1465,9 +1465,8 @@ def df_from_series(s: pd.Series):
 
         groups = individual_ts_df[["bus"]].reset_index().groupby("bus").groups
 
-        individual_p_max_df = df_from_series(individual_ts_df.p_max_pu).mul(
-            individual_ts_df.p_nom
-        )
+        individual_p_max_df = df_from_series(individual_ts_df.p_max)
+
         individual_p_max_df = pd.DataFrame(
             [
                 individual_p_max_df[idxs].sum(axis=1)
@@ -1475,9 +1474,9 @@ def df_from_series(s: pd.Series):
             ],
             index=groups.keys(),
         ).T
-        individual_p_min_df = df_from_series(individual_ts_df.p_min_pu).mul(
-            individual_ts_df.p_nom
-        )
+
+        individual_p_min_df = df_from_series(individual_ts_df.p_min)
+
         individual_p_min_df = pd.DataFrame(
             [
                 individual_p_min_df[idxs].sum(axis=1)
@@ -1521,9 +1520,8 @@ def df_from_series(s: pd.Series):
         e_max_df.columns = meta_df.bus.tolist()
         e_min_df.columns = meta_df.bus.tolist()
 
-        individual_e_max_df = df_from_series(individual_ts_df.e_max_pu).mul(
-            individual_ts_df.e_nom
-        )
+        individual_e_max_df = df_from_series(individual_ts_df.e_max)
+
         individual_e_max_df = pd.DataFrame(
             [
                 individual_e_max_df[idxs].sum(axis=1)
@@ -1531,9 +1529,8 @@ def df_from_series(s: pd.Series):
             ],
             index=groups.keys(),
         ).T
-        individual_e_min_df = df_from_series(individual_ts_df.e_min_pu).mul(
-            individual_ts_df.e_nom
-        )
+        individual_e_min_df = df_from_series(individual_ts_df.e_min)
+
         individual_e_min_df = pd.DataFrame(
             [
                 individual_e_min_df[idxs].sum(axis=1)

From 01a3949f5d2a22fcf5d58d6ee3046d8c2056ae59 Mon Sep 17 00:00:00 2001
From: Kilian Helfenbein <Kilian.Helfenbein@rl-institut.de>
Date: Thu, 2 Feb 2023 15:03:45 +0100
Subject: [PATCH 6/8] adapt dataset version and changelog

---
 CHANGELOG.rst                         | 2 ++
 src/egon/data/datasets/DSM_cts_ind.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 9c76a8416..da49ef845 100755
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -679,6 +679,8 @@ Bug Fixes
 * Fix URL of TYNDP scenario dataset
 * Automatically generated tasks now get unique :code:`task_id`\s.
   Fixes issue `#985`_ via PR `#986`_.
+* Fix faulty DSM time series
+  `#1088 <https://github.com/openego/eGon-data/issues/1088>`_
 
 .. _PR #692: https://github.com/openego/eGon-data/pull/692
 .. _#343: https://github.com/openego/eGon-data/issues/343
diff --git a/src/egon/data/datasets/DSM_cts_ind.py b/src/egon/data/datasets/DSM_cts_ind.py
index f91cb8d2a..ebf157fa7 100644
--- a/src/egon/data/datasets/DSM_cts_ind.py
+++ b/src/egon/data/datasets/DSM_cts_ind.py
@@ -77,7 +77,7 @@ class DsmPotential(Dataset):
     def __init__(self, dependencies):
         super().__init__(
             name="DsmPotential",
-            version="0.0.5.dev",
+            version="0.0.5",
             dependencies=dependencies,
             tasks=(dsm_cts_ind_processing),
         )

From 6340b0ee3fd0da13fd212fdb3fb4e0c2100f68cf Mon Sep 17 00:00:00 2001
From: Kilian Helfenbein <Kilian.Helfenbein@rl-institut.de>
Date: Thu, 16 Mar 2023 11:34:59 +0100
Subject: [PATCH 7/8] set higher tolerance

---
 src/egon/data/datasets/sanity_checks.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/egon/data/datasets/sanity_checks.py b/src/egon/data/datasets/sanity_checks.py
index e7ff05850..859bcec5c 100644
--- a/src/egon/data/datasets/sanity_checks.py
+++ b/src/egon/data/datasets/sanity_checks.py
@@ -1488,10 +1488,11 @@ def df_from_series(s: pd.Series):
         # due to the fact that time series are clipped at zero (either
         # direction) there is a little difference between the sum of the
         # individual time series and the aggregated time series as the second
-        # is generated independent of the others. This makes atol=1e-03
+        # is generated independent of the others. This makes atol=1e-01
         # necessary.
-        assert np.allclose(p_max_df, individual_p_max_df, atol=1e-03)
-        assert np.allclose(p_min_df, individual_p_min_df, atol=1e-03)
+        atol = 1e-01
+        assert np.allclose(p_max_df, individual_p_max_df, atol=atol)
+        assert np.allclose(p_min_df, individual_p_min_df, atol=atol)
 
         # e_min and e_max
         sql = f"""

From 5d2bf3e4403bd4252a1499fcc8bbf7599c4fd877 Mon Sep 17 00:00:00 2001
From: Kilian Helfenbein <Kilian.Helfenbein@rl-institut.de>
Date: Thu, 16 Mar 2023 11:43:02 +0100
Subject: [PATCH 8/8] add notice about differences between individual and
 aggregated time series

---
 src/egon/data/datasets/DSM_cts_ind.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/egon/data/datasets/DSM_cts_ind.py b/src/egon/data/datasets/DSM_cts_ind.py
index ebf157fa7..4990ccb9a 100644
--- a/src/egon/data/datasets/DSM_cts_ind.py
+++ b/src/egon/data/datasets/DSM_cts_ind.py
@@ -1,3 +1,13 @@
+"""
+Currently, there are differences in the aggregated and individual DSM time
+series. These are caused by the truncation of the values at zero.
+
+The sum of the individual time series is a more accurate value than the
+aggregated time series used so far and should replace it in the future. Since
+the deviations are relatively small, a tolerance is currently accepted in the
+sanity checks. See [#1120](https://github.com/openego/eGon-data/issues/1120)
+for updates.
+"""
 from sqlalchemy import ARRAY, Column, Float, Integer, String
 from sqlalchemy.ext.declarative import declarative_base
 import geopandas as gpd