tobirohrer · tobirohrer · Nov 3, 2023 · Nov 3, 2023 · Nov 3, 2023 · Nov 3, 2023
diff --git a/README.md b/README.md
@@ -97,6 +97,22 @@ The length of the forecast can be defined by setting the parameter `num_forecast
 
 The episode ends if the `max_timesteps` of the `Environment()` are reached.
 
+## Example Solutions
+
+The folder [example_solutions](example_solutions) contains three different example solutions to solve the problem 
+described.
+
+1. By applying deep reinforcement learning using the framework [stable-baselines3](https://github.com/DLR-RM/stable-baselines3).
+2. By formulating the problem as optimal control problem (OCP) using [pyomo](http://www.pyomo.org/). In this case, it 
+   is assumed that the forecast for the price, load and generation data for the whole period is available. 
+3. By model predictive control, which solves the optimal control problem formulation from 2. in each time step in a closed loop manner.
+   In contrast to 2. only a forecast of a fixed length is given in each iteration. 
+
+Note that the execution of the example solutions requires additional dependencies which are not specified inside `setup.py`. 
+Therefore, make sure to install the required python packages defined in `requirements.txt`. Additionally, an installation 
+of the `ipopt` solver is required in order to solve the optimal control problem 
+(by using conda, simply run `conda install -c conda-forge ipopt`). 
+
 ## Code Documentation
 
 The documentation is available at [https://building-energy-storage-simulation.readthedocs.io/](https://building-energy-storage-simulation.readthedocs.io/en/master/)

diff --git a/example_solutions/deep_reinforcement_learning.ipynb b/example_solutions/deep_reinforcement_learning.ipynb
@@ -0,0 +1,279 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import gymnasium\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from matplotlib import pyplot as plt\n",
+    "\n",
+    "from stable_baselines3 import PPO, SAC\n",
+    "from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize\n",
+    "from stable_baselines3.common.monitor import Monitor\n",
+    "\n",
+    "from building_energy_storage_simulation import BuildingSimulation, Environment\n",
+    "\n",
+    "from observation_wrapper import ObservationWrapper\n",
+    "from helper import read_data, TEST_INDEX_START, TEST_INDEX_END, BATTERY_CAPACITY, BATTERY_POWER"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Applying Reiforcement Learning Using Stable Baselines 3\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "NUM_FORECAST_STEPS = 8\n",
+    "RESULT_PATH = 'rl_example/'\n",
+    "\n",
+    "os.makedirs(RESULT_PATH, exist_ok=True)\n",
+    "\n",
+    "load, price, generation = read_data()\n",
+    "load_train = load[:TEST_INDEX_START]\n",
+    "price_train = price[:TEST_INDEX_START]\n",
+    "generation_train = generation[:TEST_INDEX_START]\n",
+    "\n",
+    "# Create Training Environment\n",
+    "sim = BuildingSimulation(electricity_load_profile=load_train,\n",
+    "                         solar_generation_profile=generation_train,\n",
+    "                         electricity_price=price_train,\n",
+    "                         max_battery_charge_per_timestep=BATTERY_POWER,\n",
+    "                         battery_capacity=BATTERY_CAPACITY)\n",
+    "\n",
+    "env = Environment(sim, num_forecasting_steps=NUM_FORECAST_STEPS, max_timesteps=len(load_train)-NUM_FORECAST_STEPS)\n",
+    "# ObservationWrapper combines forecast of load and generation to one residual load forecast\n",
+    "env = ObservationWrapper(env, NUM_FORECAST_STEPS)\n",
+    "initial_obs, info = env.reset()\n",
+    "print(initial_obs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Wrap with Monitor() so a log of the training is saved \n",
+    "env = Monitor(env, filename=RESULT_PATH)\n",
+    "# Warp with DummyVecEnc() so the observations and reward can be normalized using VecNormalize()\n",
+    "env = DummyVecEnv([lambda: env])\n",
+    "env = VecNormalize(env, norm_obs=True, norm_reward=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Train :-)\n",
+    "model = SAC(\"MlpPolicy\", env, verbose=1, gamma=0.95)\n",
+    "model.learn(total_timesteps=200000)\n",
+    "# Store the trained Model and environment stats (which are needed as we are standardizing the observations and reward using VecNormalize())\n",
+    "model.save(RESULT_PATH + 'model')\n",
+    "env.save(RESULT_PATH + 'env.pkl')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "env.save(RESULT_PATH + 'env.pkl')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Evaluation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# Plot the training process\n",
+    "training_log = pd.read_csv(RESULT_PATH + 'monitor.csv', skiprows=1)\n",
+    "training_log['r'].plot()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "load, price, generation = read_data()\n",
+    "load_eval = load[TEST_INDEX_START:]\n",
+    "price_eval = price[TEST_INDEX_START:]\n",
+    "generation_eval = generation[TEST_INDEX_START:]\n",
+    "\n",
+    "num_eval_timesteps = TEST_INDEX_END - TEST_INDEX_START\n",
+    "\n",
+    "eval_sim = BuildingSimulation(electricity_load_profile=load_eval,\n",
+    "                              solar_generation_profile=generation_eval,\n",
+    "                              electricity_price=price_eval,\n",
+    "                              max_battery_charge_per_timestep=BATTERY_POWER, \n",
+    "                              battery_capacity=BATTERY_CAPACITY)\n",
+    "\n",
+    "eval_env = Environment(eval_sim, num_forecasting_steps=NUM_FORECAST_STEPS, max_timesteps=num_eval_timesteps)\n",
+    "eval_env = ObservationWrapper(eval_env, NUM_FORECAST_STEPS)\n",
+    "eval_env = DummyVecEnv([lambda: eval_env])\n",
+    "# It is important to load the environmental statistics here as we use a rolling mean calculation !\n",
+    "eval_env = VecNormalize.load(RESULT_PATH + 'env.pkl', eval_env)     "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eval_env.training = False\n",
+    "\n",
+    "actions, observations, electricity_consumption, price, rewards = ([], [], [], [], [])\n",
+    "done = False\n",
+    "obs = eval_env.reset()\n",
+    "while not done:\n",
+    "        action = model.predict(obs, deterministic=True)\n",
+    "        obs, r, done, info = eval_env.step([action[0][0]])\n",
+    "\n",
+    "        actions.append(action[0][0][0])\n",
+    "        original_reward = eval_env.get_original_reward()[0]\n",
+    "        original_obs = eval_env.get_original_obs()[0]\n",
+    "        observations.append(original_obs)\n",
+    "        electricity_consumption.append(info[0]['electricity_consumption'])\n",
+    "        price.append(info[0]['electricity_price'])\n",
+    "        rewards.append(r)\n",
+    "        \n",
+    "trajectory = pd.DataFrame({\n",
+    "        'action': actions,\n",
+    "        'observations': observations,\n",
+    "        'electricity_consumption': electricity_consumption,\n",
+    "        'electricity_price': price,\n",
+    "        'reward': rewards\n",
+    "    })        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_data = trajectory[200:500]\n",
+    "observation_df = plot_data['observations'].apply(pd.Series)\n",
+    "augmented_load = observation_df[1] + plot_data['action'] * BATTERY_POWER\n",
+    "plt.rcParams[\"figure.figsize\"] = (16,10)\n",
+    "\n",
+    "fig1 = plt.figure()\n",
+    "ax = plt.subplot()\n",
+    "ax.plot(observation_df[1], label='Residual Load')\n",
+    "ax.plot(augmented_load, label='Augmented Load')\n",
+    "ax.plot(plot_data['electricity_price'], '--', label='Price')\n",
+    "ax.plot(plot_data['action']*50, label='Battery Power')\n",
+    "plt.ylabel('Load and Battery Power Applied (kW) & Price (Cent per kWh)')\n",
+    "plt.xlabel('Time Step')\n",
+    "ax.legend()\n",
+    "ax.grid()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Compare to Baseline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eval_env.training = False\n",
+    "\n",
+    "cost = []\n",
+    "done = False\n",
+    "obs = eval_env.reset()\n",
+    "while not done:\n",
+    "        action = model.predict(obs, deterministic=True)\n",
+    "        obs, r, done, info = eval_env.step([action[0][0]])\n",
+    "        cost.append(info[0]['electricity_consumption'] * info[0]['electricity_price'])\n",
+    "\n",
+    "cost = sum(cost)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eval_env.training = False\n",
+    "\n",
+    "baseline_cost = []\n",
+    "done = False\n",
+    "obs = eval_env.reset()\n",
+    "while not done:\n",
+    "        # Always taking noop as action. This is the electricity demand if there would be no battery\n",
+    "        action = [0]\n",
+    "        obs, r, done, info = eval_env.step(action)\n",
+    "        baseline_cost.append(info[0]['electricity_consumption'] * info[0]['electricity_price'])\n",
+    "\n",
+    "baseline_cost = sum(baseline_cost)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# how much energy did we save by utilizing the battery?\n",
+    "1 - (cost / baseline_cost)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/example_solutions/helper.py b/example_solutions/helper.py
@@ -0,0 +1,22 @@
+import pandas as pd
+import numpy as np
+from typing import Tuple
+
+# Start and end Index of data used for testing
+TEST_INDEX_START = 4380
+TEST_INDEX_END = 8500
+
+BATTERY_CAPACITY = 400
+BATTERY_POWER = 100
+
+
+def read_data() -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    load = pd.read_csv('../building_energy_storage_simulation/data/preprocessed/electricity_load_profile.csv')[
+        'Load [kWh]']
+    price = pd.read_csv('../building_energy_storage_simulation/data/preprocessed/electricity_price_profile.csv')[
+        'Day Ahead Auction']
+    generation = pd.read_csv('../building_energy_storage_simulation/data/preprocessed/solar_generation_profile.csv')[
+        'Generation [kWh]']
+    return np.array(load), np.array(price), np.array(generation)
+
+