Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/example solutions #16

Merged
merged 4 commits into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,22 @@ The length of the forecast can be defined by setting the parameter `num_forecast

The episode ends if the `max_timesteps` of the `Environment()` are reached.

## Example Solutions

The folder [example_solutions](example_solutions) contains three different example solutions to solve the problem
described.

1. By applying deep reinforcement learning using the framework [stable-baselines3](https://github.com/DLR-RM/stable-baselines3).
2. By formulating the problem as optimal control problem (OCP) using [pyomo](http://www.pyomo.org/). In this case, it
is assumed that the forecast for the price, load and generation data for the whole period is available.
3. By model predictive control, which solves the optimal control problem formulation from 2. in each time step in a closed loop manner.
In contrast to 2. only a forecast of a fixed length is given in each iteration.

Note that the execution of the example solutions requires additional dependencies which are not specified inside `setup.py`.
Therefore, make sure to install the required python packages defined in `requirements.txt`. Additionally, an installation
of the `ipopt` solver is required in order to solve the optimal control problem
(by using conda, simply run `conda install -c conda-forge ipopt`).

## Code Documentation

The documentation is available at [https://building-energy-storage-simulation.readthedocs.io/](https://building-energy-storage-simulation.readthedocs.io/en/master/)
Expand Down
279 changes: 279 additions & 0 deletions example_solutions/deep_reinforcement_learning.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"import gymnasium\n",
"import os\n",
"import pandas as pd\n",
"import numpy as np\n",
"from matplotlib import pyplot as plt\n",
"\n",
"from stable_baselines3 import PPO, SAC\n",
"from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize\n",
"from stable_baselines3.common.monitor import Monitor\n",
"\n",
"from building_energy_storage_simulation import BuildingSimulation, Environment\n",
"\n",
"from observation_wrapper import ObservationWrapper\n",
"from helper import read_data, TEST_INDEX_START, TEST_INDEX_END, BATTERY_CAPACITY, BATTERY_POWER"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Applying Reiforcement Learning Using Stable Baselines 3\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"NUM_FORECAST_STEPS = 8\n",
"RESULT_PATH = 'rl_example/'\n",
"\n",
"os.makedirs(RESULT_PATH, exist_ok=True)\n",
"\n",
"load, price, generation = read_data()\n",
"load_train = load[:TEST_INDEX_START]\n",
"price_train = price[:TEST_INDEX_START]\n",
"generation_train = generation[:TEST_INDEX_START]\n",
"\n",
"# Create Training Environment\n",
"sim = BuildingSimulation(electricity_load_profile=load_train,\n",
" solar_generation_profile=generation_train,\n",
" electricity_price=price_train,\n",
" max_battery_charge_per_timestep=BATTERY_POWER,\n",
" battery_capacity=BATTERY_CAPACITY)\n",
"\n",
"env = Environment(sim, num_forecasting_steps=NUM_FORECAST_STEPS, max_timesteps=len(load_train)-NUM_FORECAST_STEPS)\n",
"# ObservationWrapper combines forecast of load and generation to one residual load forecast\n",
"env = ObservationWrapper(env, NUM_FORECAST_STEPS)\n",
"initial_obs, info = env.reset()\n",
"print(initial_obs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Wrap with Monitor() so a log of the training is saved \n",
"env = Monitor(env, filename=RESULT_PATH)\n",
"# Warp with DummyVecEnc() so the observations and reward can be normalized using VecNormalize()\n",
"env = DummyVecEnv([lambda: env])\n",
"env = VecNormalize(env, norm_obs=True, norm_reward=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Train :-)\n",
"model = SAC(\"MlpPolicy\", env, verbose=1, gamma=0.95)\n",
"model.learn(total_timesteps=200000)\n",
"# Store the trained Model and environment stats (which are needed as we are standardizing the observations and reward using VecNormalize())\n",
"model.save(RESULT_PATH + 'model')\n",
"env.save(RESULT_PATH + 'env.pkl')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"env.save(RESULT_PATH + 'env.pkl')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Evaluation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Plot the training process\n",
"training_log = pd.read_csv(RESULT_PATH + 'monitor.csv', skiprows=1)\n",
"training_log['r'].plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"load, price, generation = read_data()\n",
"load_eval = load[TEST_INDEX_START:]\n",
"price_eval = price[TEST_INDEX_START:]\n",
"generation_eval = generation[TEST_INDEX_START:]\n",
"\n",
"num_eval_timesteps = TEST_INDEX_END - TEST_INDEX_START\n",
"\n",
"eval_sim = BuildingSimulation(electricity_load_profile=load_eval,\n",
" solar_generation_profile=generation_eval,\n",
" electricity_price=price_eval,\n",
" max_battery_charge_per_timestep=BATTERY_POWER, \n",
" battery_capacity=BATTERY_CAPACITY)\n",
"\n",
"eval_env = Environment(eval_sim, num_forecasting_steps=NUM_FORECAST_STEPS, max_timesteps=num_eval_timesteps)\n",
"eval_env = ObservationWrapper(eval_env, NUM_FORECAST_STEPS)\n",
"eval_env = DummyVecEnv([lambda: eval_env])\n",
"# It is important to load the environmental statistics here as we use a rolling mean calculation !\n",
"eval_env = VecNormalize.load(RESULT_PATH + 'env.pkl', eval_env) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"eval_env.training = False\n",
"\n",
"actions, observations, electricity_consumption, price, rewards = ([], [], [], [], [])\n",
"done = False\n",
"obs = eval_env.reset()\n",
"while not done:\n",
" action = model.predict(obs, deterministic=True)\n",
" obs, r, done, info = eval_env.step([action[0][0]])\n",
"\n",
" actions.append(action[0][0][0])\n",
" original_reward = eval_env.get_original_reward()[0]\n",
" original_obs = eval_env.get_original_obs()[0]\n",
" observations.append(original_obs)\n",
" electricity_consumption.append(info[0]['electricity_consumption'])\n",
" price.append(info[0]['electricity_price'])\n",
" rewards.append(r)\n",
" \n",
"trajectory = pd.DataFrame({\n",
" 'action': actions,\n",
" 'observations': observations,\n",
" 'electricity_consumption': electricity_consumption,\n",
" 'electricity_price': price,\n",
" 'reward': rewards\n",
" }) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plot_data = trajectory[200:500]\n",
"observation_df = plot_data['observations'].apply(pd.Series)\n",
"augmented_load = observation_df[1] + plot_data['action'] * BATTERY_POWER\n",
"plt.rcParams[\"figure.figsize\"] = (16,10)\n",
"\n",
"fig1 = plt.figure()\n",
"ax = plt.subplot()\n",
"ax.plot(observation_df[1], label='Residual Load')\n",
"ax.plot(augmented_load, label='Augmented Load')\n",
"ax.plot(plot_data['electricity_price'], '--', label='Price')\n",
"ax.plot(plot_data['action']*50, label='Battery Power')\n",
"plt.ylabel('Load and Battery Power Applied (kW) & Price (Cent per kWh)')\n",
"plt.xlabel('Time Step')\n",
"ax.legend()\n",
"ax.grid()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Compare to Baseline"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"eval_env.training = False\n",
"\n",
"cost = []\n",
"done = False\n",
"obs = eval_env.reset()\n",
"while not done:\n",
" action = model.predict(obs, deterministic=True)\n",
" obs, r, done, info = eval_env.step([action[0][0]])\n",
" cost.append(info[0]['electricity_consumption'] * info[0]['electricity_price'])\n",
"\n",
"cost = sum(cost)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"eval_env.training = False\n",
"\n",
"baseline_cost = []\n",
"done = False\n",
"obs = eval_env.reset()\n",
"while not done:\n",
" # Always taking noop as action. This is the electricity demand if there would be no battery\n",
" action = [0]\n",
" obs, r, done, info = eval_env.step(action)\n",
" baseline_cost.append(info[0]['electricity_consumption'] * info[0]['electricity_price'])\n",
"\n",
"baseline_cost = sum(baseline_cost)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# how much energy did we save by utilizing the battery?\n",
"1 - (cost / baseline_cost)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
22 changes: 22 additions & 0 deletions example_solutions/helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import pandas as pd
import numpy as np
from typing import Tuple

# Start and end Index of data used for testing
TEST_INDEX_START = 4380
TEST_INDEX_END = 8500

BATTERY_CAPACITY = 400
BATTERY_POWER = 100


def read_data() -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
load = pd.read_csv('../building_energy_storage_simulation/data/preprocessed/electricity_load_profile.csv')[
'Load [kWh]']
price = pd.read_csv('../building_energy_storage_simulation/data/preprocessed/electricity_price_profile.csv')[
'Day Ahead Auction']
generation = pd.read_csv('../building_energy_storage_simulation/data/preprocessed/solar_generation_profile.csv')[
'Generation [kWh]']
return np.array(load), np.array(price), np.array(generation)


Loading