From 31aee7703a3ca62df7eb890ba052c83ca45c3247 Mon Sep 17 00:00:00 2001 From: tobirohrer Date: Fri, 3 Nov 2023 14:05:32 +0100 Subject: [PATCH 1/4] refactored rl solution --- example_solutions/helper.py | 19 + example_solutions/observation_wrapper.py | 31 + example_solutions/optimal_control_problem.py | 0 ...ement_learning_sample_implementation.ipynb | 1453 +++++++++++++++++ 4 files changed, 1503 insertions(+) create mode 100644 example_solutions/helper.py create mode 100644 example_solutions/observation_wrapper.py create mode 100644 example_solutions/optimal_control_problem.py create mode 100644 example_solutions/reinforcement_learning_sample_implementation.ipynb diff --git a/example_solutions/helper.py b/example_solutions/helper.py new file mode 100644 index 0000000..21e20df --- /dev/null +++ b/example_solutions/helper.py @@ -0,0 +1,19 @@ +import pandas as pd +import numpy as np +from typing import Tuple + +# Start and end Index of data used for testing +TEST_INDEX_START = 4380 +TEST_INDEX_END = 8500 + + +def read_data() -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + load = pd.read_csv('../building_energy_storage_simulation/data/preprocessed/electricity_load_profile.csv')[ + 'Load [kWh]'] + price = pd.read_csv('../building_energy_storage_simulation/data/preprocessed/electricity_price_profile.csv')[ + 'Day Ahead Auction'] + generation = pd.read_csv('../building_energy_storage_simulation/data/preprocessed/solar_generation_profile.csv')[ + 'Generation [kWh]'] + return np.array(load), np.array(price), np.array(generation) + + diff --git a/example_solutions/observation_wrapper.py b/example_solutions/observation_wrapper.py new file mode 100644 index 0000000..382ecbd --- /dev/null +++ b/example_solutions/observation_wrapper.py @@ -0,0 +1,31 @@ +import gymnasium +import numpy as np + + +class ObservationWrapper(gymnasium.Wrapper): + def __init__(self, env, forecast_length): + super().__init__(env) + + self.forecast_length = forecast_length + original_observation_space_length = self.observation_space.shape[0] + self.observation_space = gymnasium.spaces.Box(shape=(original_observation_space_length - forecast_length,), + low=-np.inf, + high=np.inf, dtype=np.float32) + + def reset(self, seed: int = 42, options=None): + obs, info = self.env.reset() + return self.convert_observation(obs), info + + def step(self, action): + obs, reward, done, trunc, info = self.env.step(action) + return self.convert_observation(obs), reward, done, trunc, info + + def convert_observation(self, obs): + load_forecast = obs[1: self.forecast_length + 1] + generation_forecast = obs[self.forecast_length + 1: 2 * self.forecast_length + 1] + price_forecast = obs[2 * self.forecast_length + 1: 3 * self.forecast_length + 1] + soc = obs[0] + return np.concatenate(([soc], + load_forecast - generation_forecast, + price_forecast), + axis=0) diff --git a/example_solutions/optimal_control_problem.py b/example_solutions/optimal_control_problem.py new file mode 100644 index 0000000..e69de29 diff --git a/example_solutions/reinforcement_learning_sample_implementation.ipynb b/example_solutions/reinforcement_learning_sample_implementation.ipynb new file mode 100644 index 0000000..d301724 --- /dev/null +++ b/example_solutions/reinforcement_learning_sample_implementation.ipynb @@ -0,0 +1,1453 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import gymnasium\n", + "import os\n", + "import pandas as pd\n", + "import numpy as np\n", + "from matplotlib import pyplot as plt\n", + "\n", + "from stable_baselines3 import PPO, SAC\n", + "from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize\n", + "from stable_baselines3.common.monitor import Monitor\n", + "\n", + "from building_energy_storage_simulation import BuildingSimulation, Environment\n", + "\n", + "from observation_wrapper import ObservationWrapper\n", + "from helper import read_data, TEST_INDEX_START, TEST_INDEX_END" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Applying Reiforcement Learning Using Stable Baselines 3\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 0. 9.89 9.08 8.22 8.57 8.93 9.2 10.71\n", + " 3.98309 5.005 4.133 4.322 4.546 3.767 3.97 4.059\n", + " 4.326 ]\n" + ] + } + ], + "source": [ + "NUM_FORECAST_STEPS = 8\n", + "RESULT_PATH = 'rl_example/'\n", + "\n", + "os.makedirs(RESULT_PATH, exist_ok=True)\n", + "\n", + "load, price, generation = read_data()\n", + "load_train = load[:TEST_INDEX_START]\n", + "price_train = price[:TEST_INDEX_START]\n", + "generation_train = generation[:TEST_INDEX_START]\n", + "\n", + "# Create Training Environment\n", + "sim = BuildingSimulation(electricity_load_profile=load_train,\n", + " solar_generation_profile=generation_train,\n", + " electricity_price=price_train,\n", + " max_battery_charge_per_timestep=100,\n", + " battery_capacity=400)\n", + "\n", + "env = Environment(sim, num_forecasting_steps=NUM_FORECAST_STEPS, max_timesteps=len(load_train)-NUM_FORECAST_STEPS)\n", + "# ObservationWrapper combines forecast of load and generation to one residual load forecast\n", + "env = ObservationWrapper(env, NUM_FORECAST_STEPS)\n", + "initial_obs, info = env.reset()\n", + "print(initial_obs)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Wrap with Monitor() so a log of the training is saved \n", + "env = Monitor(env, filename=RESULT_PATH)\n", + "# Warp with DummyVecEnc() so the observations and reward can be normalized using VecNormalize()\n", + "env = DummyVecEnv([lambda: env])\n", + "env = VecNormalize(env, norm_obs=True, norm_reward=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using cpu device\n", + "-----------------------------\n", + "| time/ | |\n", + "| fps | 1846 |\n", + "| iterations | 1 |\n", + "| time_elapsed | 1 |\n", + "| total_timesteps | 2048 |\n", + "-----------------------------\n", + "------------------------------------------\n", + "| time/ | |\n", + "| fps | 1387 |\n", + "| iterations | 2 |\n", + "| time_elapsed | 2 |\n", + "| total_timesteps | 4096 |\n", + "| train/ | |\n", + "| approx_kl | 0.0036777142 |\n", + "| clip_fraction | 0.0229 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | -0.708 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 0.000247 |\n", + "| n_updates | 10 |\n", + "| policy_gradient_loss | -0.00348 |\n", + "| std | 0.995 |\n", + "| value_loss | 0.234 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -5.1e+06 |\n", + "| time/ | |\n", + "| fps | 1261 |\n", + "| iterations | 3 |\n", + "| time_elapsed | 4 |\n", + "| total_timesteps | 6144 |\n", + "| train/ | |\n", + "| approx_kl | 0.0045435634 |\n", + "| clip_fraction | 0.0198 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.4 |\n", + "| explained_variance | 0.323 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 0.00283 |\n", + "| n_updates | 20 |\n", + "| policy_gradient_loss | -0.00426 |\n", + "| std | 0.97 |\n", + "| value_loss | 0.0201 |\n", + "------------------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -5.1e+06 |\n", + "| time/ | |\n", + "| fps | 1205 |\n", + "| iterations | 4 |\n", + "| time_elapsed | 6 |\n", + "| total_timesteps | 8192 |\n", + "| train/ | |\n", + "| approx_kl | 0.004573004 |\n", + "| clip_fraction | 0.0329 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.4 |\n", + "| explained_variance | 0.611 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 0.00737 |\n", + "| n_updates | 30 |\n", + "| policy_gradient_loss | -0.00455 |\n", + "| std | 0.984 |\n", + "| value_loss | 0.0243 |\n", + "-----------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -5e+06 |\n", + "| time/ | |\n", + "| fps | 1169 |\n", + "| iterations | 5 |\n", + "| time_elapsed | 8 |\n", + "| total_timesteps | 10240 |\n", + "| train/ | |\n", + "| approx_kl | 0.0037469426 |\n", + "| clip_fraction | 0.0484 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.41 |\n", + "| explained_variance | 0.57 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0261 |\n", + "| n_updates | 40 |\n", + "| policy_gradient_loss | -0.00724 |\n", + "| std | 0.99 |\n", + "| value_loss | 0.00943 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -5e+06 |\n", + "| time/ | |\n", + "| fps | 1148 |\n", + "| iterations | 6 |\n", + "| time_elapsed | 10 |\n", + "| total_timesteps | 12288 |\n", + "| train/ | |\n", + "| approx_kl | 0.0058725784 |\n", + "| clip_fraction | 0.0702 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0.751 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0108 |\n", + "| n_updates | 50 |\n", + "| policy_gradient_loss | -0.00806 |\n", + "| std | 1 |\n", + "| value_loss | 0.0147 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.93e+06 |\n", + "| time/ | |\n", + "| fps | 1139 |\n", + "| iterations | 7 |\n", + "| time_elapsed | 12 |\n", + "| total_timesteps | 14336 |\n", + "| train/ | |\n", + "| approx_kl | 0.0058474382 |\n", + "| clip_fraction | 0.0539 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0.635 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.00986 |\n", + "| n_updates | 60 |\n", + "| policy_gradient_loss | -0.00606 |\n", + "| std | 0.999 |\n", + "| value_loss | 0.00742 |\n", + "------------------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.93e+06 |\n", + "| time/ | |\n", + "| fps | 1132 |\n", + "| iterations | 8 |\n", + "| time_elapsed | 14 |\n", + "| total_timesteps | 16384 |\n", + "| train/ | |\n", + "| approx_kl | 0.003978129 |\n", + "| clip_fraction | 0.025 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.42 |\n", + "| explained_variance | 0.82 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0199 |\n", + "| n_updates | 70 |\n", + "| policy_gradient_loss | -0.00461 |\n", + "| std | 1 |\n", + "| value_loss | 0.00987 |\n", + "-----------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.89e+06 |\n", + "| time/ | |\n", + "| fps | 1127 |\n", + "| iterations | 9 |\n", + "| time_elapsed | 16 |\n", + "| total_timesteps | 18432 |\n", + "| train/ | |\n", + "| approx_kl | 0.0047321245 |\n", + "| clip_fraction | 0.0652 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.4 |\n", + "| explained_variance | 0.685 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0133 |\n", + "| n_updates | 80 |\n", + "| policy_gradient_loss | -0.00815 |\n", + "| std | 0.97 |\n", + "| value_loss | 0.00682 |\n", + "------------------------------------------\n", + "----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.89e+06 |\n", + "| time/ | |\n", + "| fps | 1125 |\n", + "| iterations | 10 |\n", + "| time_elapsed | 18 |\n", + "| total_timesteps | 20480 |\n", + "| train/ | |\n", + "| approx_kl | 0.00393809 |\n", + "| clip_fraction | 0.0249 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.38 |\n", + "| explained_variance | 0.846 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0126 |\n", + "| n_updates | 90 |\n", + "| policy_gradient_loss | -0.00404 |\n", + "| std | 0.955 |\n", + "| value_loss | 0.00849 |\n", + "----------------------------------------\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.85e+06 |\n", + "| time/ | |\n", + "| fps | 1122 |\n", + "| iterations | 11 |\n", + "| time_elapsed | 20 |\n", + "| total_timesteps | 22528 |\n", + "| train/ | |\n", + "| approx_kl | 0.0056245844 |\n", + "| clip_fraction | 0.0404 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.37 |\n", + "| explained_variance | 0.752 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 0.00556 |\n", + "| n_updates | 100 |\n", + "| policy_gradient_loss | -0.00522 |\n", + "| std | 0.941 |\n", + "| value_loss | 0.00499 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.85e+06 |\n", + "| time/ | |\n", + "| fps | 1119 |\n", + "| iterations | 12 |\n", + "| time_elapsed | 21 |\n", + "| total_timesteps | 24576 |\n", + "| train/ | |\n", + "| approx_kl | 0.0032175046 |\n", + "| clip_fraction | 0.0312 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.35 |\n", + "| explained_variance | 0.827 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.00478 |\n", + "| n_updates | 110 |\n", + "| policy_gradient_loss | -0.00691 |\n", + "| std | 0.935 |\n", + "| value_loss | 0.00943 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.83e+06 |\n", + "| time/ | |\n", + "| fps | 1110 |\n", + "| iterations | 13 |\n", + "| time_elapsed | 23 |\n", + "| total_timesteps | 26624 |\n", + "| train/ | |\n", + "| approx_kl | 0.0033950265 |\n", + "| clip_fraction | 0.0379 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.35 |\n", + "| explained_variance | 0.738 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 0.00281 |\n", + "| n_updates | 120 |\n", + "| policy_gradient_loss | -0.00417 |\n", + "| std | 0.928 |\n", + "| value_loss | 0.00453 |\n", + "------------------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.83e+06 |\n", + "| time/ | |\n", + "| fps | 1054 |\n", + "| iterations | 14 |\n", + "| time_elapsed | 27 |\n", + "| total_timesteps | 28672 |\n", + "| train/ | |\n", + "| approx_kl | 0.005857446 |\n", + "| clip_fraction | 0.0508 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.33 |\n", + "| explained_variance | 0.841 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.00559 |\n", + "| n_updates | 130 |\n", + "| policy_gradient_loss | -0.00754 |\n", + "| std | 0.905 |\n", + "| value_loss | 0.00871 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.79e+06 |\n", + "| time/ | |\n", + "| fps | 1010 |\n", + "| iterations | 15 |\n", + "| time_elapsed | 30 |\n", + "| total_timesteps | 30720 |\n", + "| train/ | |\n", + "| approx_kl | 0.005098461 |\n", + "| clip_fraction | 0.0383 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.31 |\n", + "| explained_variance | 0.684 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0102 |\n", + "| n_updates | 140 |\n", + "| policy_gradient_loss | -0.00649 |\n", + "| std | 0.895 |\n", + "| value_loss | 0.00368 |\n", + "-----------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.79e+06 |\n", + "| time/ | |\n", + "| fps | 996 |\n", + "| iterations | 16 |\n", + "| time_elapsed | 32 |\n", + "| total_timesteps | 32768 |\n", + "| train/ | |\n", + "| approx_kl | 0.0066045905 |\n", + "| clip_fraction | 0.0508 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.3 |\n", + "| explained_variance | 0.862 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 0.01 |\n", + "| n_updates | 150 |\n", + "| policy_gradient_loss | -0.00624 |\n", + "| std | 0.885 |\n", + "| value_loss | 0.00755 |\n", + "------------------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.79e+06 |\n", + "| time/ | |\n", + "| fps | 999 |\n", + "| iterations | 17 |\n", + "| time_elapsed | 34 |\n", + "| total_timesteps | 34816 |\n", + "| train/ | |\n", + "| approx_kl | 0.004595418 |\n", + "| clip_fraction | 0.0344 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.28 |\n", + "| explained_variance | 0.662 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 0.00958 |\n", + "| n_updates | 160 |\n", + "| policy_gradient_loss | -0.0061 |\n", + "| std | 0.859 |\n", + "| value_loss | 0.00385 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.76e+06 |\n", + "| time/ | |\n", + "| fps | 1002 |\n", + "| iterations | 18 |\n", + "| time_elapsed | 36 |\n", + "| total_timesteps | 36864 |\n", + "| train/ | |\n", + "| approx_kl | 0.008695626 |\n", + "| clip_fraction | 0.0789 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.25 |\n", + "| explained_variance | 0.84 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0146 |\n", + "| n_updates | 170 |\n", + "| policy_gradient_loss | -0.00832 |\n", + "| std | 0.835 |\n", + "| value_loss | 0.00851 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.76e+06 |\n", + "| time/ | |\n", + "| fps | 1004 |\n", + "| iterations | 19 |\n", + "| time_elapsed | 38 |\n", + "| total_timesteps | 38912 |\n", + "| train/ | |\n", + "| approx_kl | 0.004202239 |\n", + "| clip_fraction | 0.0506 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.23 |\n", + "| explained_variance | 0.875 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0112 |\n", + "| n_updates | 180 |\n", + "| policy_gradient_loss | -0.00643 |\n", + "| std | 0.826 |\n", + "| value_loss | 0.0049 |\n", + "-----------------------------------------\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.72e+06 |\n", + "| time/ | |\n", + "| fps | 1006 |\n", + "| iterations | 20 |\n", + "| time_elapsed | 40 |\n", + "| total_timesteps | 40960 |\n", + "| train/ | |\n", + "| approx_kl | 0.0056182286 |\n", + "| clip_fraction | 0.044 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.21 |\n", + "| explained_variance | 0.735 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0388 |\n", + "| n_updates | 190 |\n", + "| policy_gradient_loss | -0.00686 |\n", + "| std | 0.801 |\n", + "| value_loss | 0.00517 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.72e+06 |\n", + "| time/ | |\n", + "| fps | 1009 |\n", + "| iterations | 21 |\n", + "| time_elapsed | 42 |\n", + "| total_timesteps | 43008 |\n", + "| train/ | |\n", + "| approx_kl | 0.0044678794 |\n", + "| clip_fraction | 0.0564 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.2 |\n", + "| explained_variance | 0.893 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 0.00494 |\n", + "| n_updates | 200 |\n", + "| policy_gradient_loss | -0.00672 |\n", + "| std | 0.803 |\n", + "| value_loss | 0.00637 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.68e+06 |\n", + "| time/ | |\n", + "| fps | 987 |\n", + "| iterations | 22 |\n", + "| time_elapsed | 45 |\n", + "| total_timesteps | 45056 |\n", + "| train/ | |\n", + "| approx_kl | 0.0033513391 |\n", + "| clip_fraction | 0.0302 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.19 |\n", + "| explained_variance | 0.765 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.00761 |\n", + "| n_updates | 210 |\n", + "| policy_gradient_loss | -0.0038 |\n", + "| std | 0.794 |\n", + "| value_loss | 0.00508 |\n", + "------------------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.68e+06 |\n", + "| time/ | |\n", + "| fps | 964 |\n", + "| iterations | 23 |\n", + "| time_elapsed | 48 |\n", + "| total_timesteps | 47104 |\n", + "| train/ | |\n", + "| approx_kl | 0.004656489 |\n", + "| clip_fraction | 0.0439 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.18 |\n", + "| explained_variance | 0.908 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0369 |\n", + "| n_updates | 220 |\n", + "| policy_gradient_loss | -0.00686 |\n", + "| std | 0.779 |\n", + "| value_loss | 0.00658 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.64e+06 |\n", + "| time/ | |\n", + "| fps | 962 |\n", + "| iterations | 24 |\n", + "| time_elapsed | 51 |\n", + "| total_timesteps | 49152 |\n", + "| train/ | |\n", + "| approx_kl | 0.005987567 |\n", + "| clip_fraction | 0.05 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.16 |\n", + "| explained_variance | 0.723 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 0.00145 |\n", + "| n_updates | 230 |\n", + "| policy_gradient_loss | -0.00726 |\n", + "| std | 0.765 |\n", + "| value_loss | 0.00448 |\n", + "-----------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.64e+06 |\n", + "| time/ | |\n", + "| fps | 966 |\n", + "| iterations | 25 |\n", + "| time_elapsed | 52 |\n", + "| total_timesteps | 51200 |\n", + "| train/ | |\n", + "| approx_kl | 0.0054580546 |\n", + "| clip_fraction | 0.043 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.15 |\n", + "| explained_variance | 0.901 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.00864 |\n", + "| n_updates | 240 |\n", + "| policy_gradient_loss | -0.00724 |\n", + "| std | 0.766 |\n", + "| value_loss | 0.00696 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.61e+06 |\n", + "| time/ | |\n", + "| fps | 970 |\n", + "| iterations | 26 |\n", + "| time_elapsed | 54 |\n", + "| total_timesteps | 53248 |\n", + "| train/ | |\n", + "| approx_kl | 0.0048291944 |\n", + "| clip_fraction | 0.0397 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.14 |\n", + "| explained_variance | 0.754 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0127 |\n", + "| n_updates | 250 |\n", + "| policy_gradient_loss | -0.0047 |\n", + "| std | 0.745 |\n", + "| value_loss | 0.00429 |\n", + "------------------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.61e+06 |\n", + "| time/ | |\n", + "| fps | 973 |\n", + "| iterations | 27 |\n", + "| time_elapsed | 56 |\n", + "| total_timesteps | 55296 |\n", + "| train/ | |\n", + "| approx_kl | 0.006914062 |\n", + "| clip_fraction | 0.0762 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.12 |\n", + "| explained_variance | 0.898 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 0.0234 |\n", + "| n_updates | 260 |\n", + "| policy_gradient_loss | -0.00764 |\n", + "| std | 0.739 |\n", + "| value_loss | 0.00753 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.58e+06 |\n", + "| time/ | |\n", + "| fps | 976 |\n", + "| iterations | 28 |\n", + "| time_elapsed | 58 |\n", + "| total_timesteps | 57344 |\n", + "| train/ | |\n", + "| approx_kl | 0.004374048 |\n", + "| clip_fraction | 0.0495 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.11 |\n", + "| explained_variance | 0.734 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.00411 |\n", + "| n_updates | 270 |\n", + "| policy_gradient_loss | -0.00574 |\n", + "| std | 0.732 |\n", + "| value_loss | 0.00341 |\n", + "-----------------------------------------\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.58e+06 |\n", + "| time/ | |\n", + "| fps | 979 |\n", + "| iterations | 29 |\n", + "| time_elapsed | 60 |\n", + "| total_timesteps | 59392 |\n", + "| train/ | |\n", + "| approx_kl | 0.006090526 |\n", + "| clip_fraction | 0.0544 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.1 |\n", + "| explained_variance | 0.896 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0111 |\n", + "| n_updates | 280 |\n", + "| policy_gradient_loss | -0.00702 |\n", + "| std | 0.722 |\n", + "| value_loss | 0.00728 |\n", + "-----------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.55e+06 |\n", + "| time/ | |\n", + "| fps | 983 |\n", + "| iterations | 30 |\n", + "| time_elapsed | 62 |\n", + "| total_timesteps | 61440 |\n", + "| train/ | |\n", + "| approx_kl | 0.0043111267 |\n", + "| clip_fraction | 0.0461 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.08 |\n", + "| explained_variance | 0.72 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0198 |\n", + "| n_updates | 290 |\n", + "| policy_gradient_loss | -0.00596 |\n", + "| std | 0.705 |\n", + "| value_loss | 0.00319 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.55e+06 |\n", + "| time/ | |\n", + "| fps | 979 |\n", + "| iterations | 31 |\n", + "| time_elapsed | 64 |\n", + "| total_timesteps | 63488 |\n", + "| train/ | |\n", + "| approx_kl | 0.0050121583 |\n", + "| clip_fraction | 0.0552 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.06 |\n", + "| explained_variance | 0.893 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.00695 |\n", + "| n_updates | 300 |\n", + "| policy_gradient_loss | -0.00873 |\n", + "| std | 0.696 |\n", + "| value_loss | 0.0067 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.55e+06 |\n", + "| time/ | |\n", + "| fps | 965 |\n", + "| iterations | 32 |\n", + "| time_elapsed | 67 |\n", + "| total_timesteps | 65536 |\n", + "| train/ | |\n", + "| approx_kl | 0.0067488514 |\n", + "| clip_fraction | 0.0677 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.05 |\n", + "| explained_variance | 0.653 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0191 |\n", + "| n_updates | 310 |\n", + "| policy_gradient_loss | -0.00957 |\n", + "| std | 0.687 |\n", + "| value_loss | 0.00315 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.52e+06 |\n", + "| time/ | |\n", + "| fps | 950 |\n", + "| iterations | 33 |\n", + "| time_elapsed | 71 |\n", + "| total_timesteps | 67584 |\n", + "| train/ | |\n", + "| approx_kl | 0.0039351527 |\n", + "| clip_fraction | 0.0503 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.04 |\n", + "| explained_variance | 0.89 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.011 |\n", + "| n_updates | 320 |\n", + "| policy_gradient_loss | -0.00724 |\n", + "| std | 0.681 |\n", + "| value_loss | 0.00592 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.52e+06 |\n", + "| time/ | |\n", + "| fps | 951 |\n", + "| iterations | 34 |\n", + "| time_elapsed | 73 |\n", + "| total_timesteps | 69632 |\n", + "| train/ | |\n", + "| approx_kl | 0.0057587875 |\n", + "| clip_fraction | 0.0638 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -1.02 |\n", + "| explained_variance | 0.652 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.00516 |\n", + "| n_updates | 330 |\n", + "| policy_gradient_loss | -0.0065 |\n", + "| std | 0.665 |\n", + "| value_loss | 0.00255 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.49e+06 |\n", + "| time/ | |\n", + "| fps | 954 |\n", + "| iterations | 35 |\n", + "| time_elapsed | 75 |\n", + "| total_timesteps | 71680 |\n", + "| train/ | |\n", + "| approx_kl | 0.0055833566 |\n", + "| clip_fraction | 0.0664 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -0.996 |\n", + "| explained_variance | 0.798 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 0.0045 |\n", + "| n_updates | 340 |\n", + "| policy_gradient_loss | -0.00726 |\n", + "| std | 0.649 |\n", + "| value_loss | 0.00423 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.49e+06 |\n", + "| time/ | |\n", + "| fps | 958 |\n", + "| iterations | 36 |\n", + "| time_elapsed | 76 |\n", + "| total_timesteps | 73728 |\n", + "| train/ | |\n", + "| approx_kl | 0.0051649846 |\n", + "| clip_fraction | 0.0429 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -0.993 |\n", + "| explained_variance | 0.899 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0105 |\n", + "| n_updates | 350 |\n", + "| policy_gradient_loss | -0.00616 |\n", + "| std | 0.657 |\n", + "| value_loss | 0.00457 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.47e+06 |\n", + "| time/ | |\n", + "| fps | 961 |\n", + "| iterations | 37 |\n", + "| time_elapsed | 78 |\n", + "| total_timesteps | 75776 |\n", + "| train/ | |\n", + "| approx_kl | 0.0059529413 |\n", + "| clip_fraction | 0.0619 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -0.995 |\n", + "| explained_variance | 0.776 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0184 |\n", + "| n_updates | 360 |\n", + "| policy_gradient_loss | -0.00641 |\n", + "| std | 0.652 |\n", + "| value_loss | 0.00389 |\n", + "------------------------------------------\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.47e+06 |\n", + "| time/ | |\n", + "| fps | 963 |\n", + "| iterations | 38 |\n", + "| time_elapsed | 80 |\n", + "| total_timesteps | 77824 |\n", + "| train/ | |\n", + "| approx_kl | 0.005639543 |\n", + "| clip_fraction | 0.0458 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -0.996 |\n", + "| explained_variance | 0.915 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.011 |\n", + "| n_updates | 370 |\n", + "| policy_gradient_loss | -0.00759 |\n", + "| std | 0.655 |\n", + "| value_loss | 0.00563 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.45e+06 |\n", + "| time/ | |\n", + "| fps | 966 |\n", + "| iterations | 39 |\n", + "| time_elapsed | 82 |\n", + "| total_timesteps | 79872 |\n", + "| train/ | |\n", + "| approx_kl | 0.006792381 |\n", + "| clip_fraction | 0.0622 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -0.99 |\n", + "| explained_variance | 0.756 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0247 |\n", + "| n_updates | 380 |\n", + "| policy_gradient_loss | -0.00831 |\n", + "| std | 0.646 |\n", + "| value_loss | 0.00382 |\n", + "-----------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.45e+06 |\n", + "| time/ | |\n", + "| fps | 956 |\n", + "| iterations | 40 |\n", + "| time_elapsed | 85 |\n", + "| total_timesteps | 81920 |\n", + "| train/ | |\n", + "| approx_kl | 0.0076133907 |\n", + "| clip_fraction | 0.0587 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -0.973 |\n", + "| explained_variance | 0.924 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0182 |\n", + "| n_updates | 390 |\n", + "| policy_gradient_loss | -0.00696 |\n", + "| std | 0.636 |\n", + "| value_loss | 0.00579 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.42e+06 |\n", + "| time/ | |\n", + "| fps | 944 |\n", + "| iterations | 41 |\n", + "| time_elapsed | 88 |\n", + "| total_timesteps | 83968 |\n", + "| train/ | |\n", + "| approx_kl | 0.0061445124 |\n", + "| clip_fraction | 0.0628 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -0.964 |\n", + "| explained_variance | 0.754 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0154 |\n", + "| n_updates | 400 |\n", + "| policy_gradient_loss | -0.00757 |\n", + "| std | 0.634 |\n", + "| value_loss | 0.00352 |\n", + "------------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.42e+06 |\n", + "| time/ | |\n", + "| fps | 933 |\n", + "| iterations | 42 |\n", + "| time_elapsed | 92 |\n", + "| total_timesteps | 86016 |\n", + "| train/ | |\n", + "| approx_kl | 0.0058118524 |\n", + "| clip_fraction | 0.0599 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -0.97 |\n", + "| explained_variance | 0.918 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.024 |\n", + "| n_updates | 410 |\n", + "| policy_gradient_loss | -0.00817 |\n", + "| std | 0.642 |\n", + "| value_loss | 0.00622 |\n", + "------------------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.4e+06 |\n", + "| time/ | |\n", + "| fps | 935 |\n", + "| iterations | 43 |\n", + "| time_elapsed | 94 |\n", + "| total_timesteps | 88064 |\n", + "| train/ | |\n", + "| approx_kl | 0.005398696 |\n", + "| clip_fraction | 0.0429 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -0.953 |\n", + "| explained_variance | 0.77 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.00122 |\n", + "| n_updates | 420 |\n", + "| policy_gradient_loss | -0.00627 |\n", + "| std | 0.618 |\n", + "| value_loss | 0.00296 |\n", + "-----------------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.4e+06 |\n", + "| time/ | |\n", + "| fps | 937 |\n", + "| iterations | 44 |\n", + "| time_elapsed | 96 |\n", + "| total_timesteps | 90112 |\n", + "| train/ | |\n", + "| approx_kl | 0.005530538 |\n", + "| clip_fraction | 0.0579 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -0.943 |\n", + "| explained_variance | 0.909 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0142 |\n", + "| n_updates | 430 |\n", + "| policy_gradient_loss | -0.00727 |\n", + "| std | 0.623 |\n", + "| value_loss | 0.00661 |\n", + "-----------------------------------------\n", + "------------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.39e+06 |\n", + "| time/ | |\n", + "| fps | 940 |\n", + "| iterations | 45 |\n", + "| time_elapsed | 98 |\n", + "| total_timesteps | 92160 |\n", + "| train/ | |\n", + "| approx_kl | 0.0078549655 |\n", + "| clip_fraction | 0.0686 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -0.947 |\n", + "| explained_variance | 0.765 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.0267 |\n", + "| n_updates | 440 |\n", + "| policy_gradient_loss | -0.00908 |\n", + "| std | 0.625 |\n", + "| value_loss | 0.00305 |\n", + "------------------------------------------\n", + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.39e+06 |\n", + "| time/ | |\n", + "| fps | 940 |\n", + "| iterations | 46 |\n", + "| time_elapsed | 100 |\n", + "| total_timesteps | 94208 |\n", + "| train/ | |\n", + "| approx_kl | 0.005267841 |\n", + "| clip_fraction | 0.0453 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -0.937 |\n", + "| explained_variance | 0.908 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | -0.00631 |\n", + "| n_updates | 450 |\n", + "| policy_gradient_loss | -0.00533 |\n", + "| std | 0.612 |\n", + "| value_loss | 0.00648 |\n", + "-----------------------------------------\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-----------------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 4.37e+03 |\n", + "| ep_rew_mean | -4.37e+06 |\n", + "| time/ | |\n", + "| fps | 930 |\n", + "| iterations | 47 |\n", + "| time_elapsed | 103 |\n", + "| total_timesteps | 96256 |\n", + "| train/ | |\n", + "| approx_kl | 0.007721955 |\n", + "| clip_fraction | 0.086 |\n", + "| clip_range | 0.2 |\n", + "| entropy_loss | -0.919 |\n", + "| explained_variance | 0.753 |\n", + "| learning_rate | 0.0003 |\n", + "| loss | 0.0058 |\n", + "| n_updates | 460 |\n", + "| policy_gradient_loss | -0.00828 |\n", + "| std | 0.601 |\n", + "| value_loss | 0.00257 |\n", + "-----------------------------------------\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[4], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Train :-)\u001b[39;00m\n\u001b[1;32m 2\u001b[0m model \u001b[38;5;241m=\u001b[39m PPO(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMlpPolicy\u001b[39m\u001b[38;5;124m\"\u001b[39m, env, verbose\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, gamma\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.95\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlearn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtotal_timesteps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m200000\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# Store the trained Model and environment stats (which are needed as we are standardizing the observations and reward using VecNormalize())\u001b[39;00m\n\u001b[1;32m 5\u001b[0m model\u001b[38;5;241m.\u001b[39msave(RESULT_PATH \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/stable_baselines3/ppo/ppo.py:308\u001b[0m, in \u001b[0;36mPPO.learn\u001b[0;34m(self, total_timesteps, callback, log_interval, tb_log_name, reset_num_timesteps, progress_bar)\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mlearn\u001b[39m(\n\u001b[1;32m 300\u001b[0m \u001b[38;5;28mself\u001b[39m: SelfPPO,\n\u001b[1;32m 301\u001b[0m total_timesteps: \u001b[38;5;28mint\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 306\u001b[0m progress_bar: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 307\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m SelfPPO:\n\u001b[0;32m--> 308\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlearn\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 309\u001b[0m \u001b[43m \u001b[49m\u001b[43mtotal_timesteps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtotal_timesteps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 310\u001b[0m \u001b[43m \u001b[49m\u001b[43mcallback\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallback\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 311\u001b[0m \u001b[43m \u001b[49m\u001b[43mlog_interval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlog_interval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 312\u001b[0m \u001b[43m \u001b[49m\u001b[43mtb_log_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtb_log_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 313\u001b[0m \u001b[43m \u001b[49m\u001b[43mreset_num_timesteps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreset_num_timesteps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 314\u001b[0m \u001b[43m \u001b[49m\u001b[43mprogress_bar\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprogress_bar\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 315\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/stable_baselines3/common/on_policy_algorithm.py:259\u001b[0m, in \u001b[0;36mOnPolicyAlgorithm.learn\u001b[0;34m(self, total_timesteps, callback, log_interval, tb_log_name, reset_num_timesteps, progress_bar)\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menv \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 258\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnum_timesteps \u001b[38;5;241m<\u001b[39m total_timesteps:\n\u001b[0;32m--> 259\u001b[0m continue_training \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollect_rollouts\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43menv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcallback\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrollout_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_rollout_steps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_steps\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 261\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m continue_training \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m:\n\u001b[1;32m 262\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", + "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/stable_baselines3/common/on_policy_algorithm.py:169\u001b[0m, in \u001b[0;36mOnPolicyAlgorithm.collect_rollouts\u001b[0;34m(self, env, callback, rollout_buffer, n_rollout_steps)\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m th\u001b[38;5;241m.\u001b[39mno_grad():\n\u001b[1;32m 167\u001b[0m \u001b[38;5;66;03m# Convert to pytorch tensor or to TensorDict\u001b[39;00m\n\u001b[1;32m 168\u001b[0m obs_tensor \u001b[38;5;241m=\u001b[39m obs_as_tensor(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_last_obs, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[0;32m--> 169\u001b[0m actions, values, log_probs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpolicy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobs_tensor\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 170\u001b[0m actions \u001b[38;5;241m=\u001b[39m actions\u001b[38;5;241m.\u001b[39mcpu()\u001b[38;5;241m.\u001b[39mnumpy()\n\u001b[1;32m 172\u001b[0m \u001b[38;5;66;03m# Rescale and perform action\u001b[39;00m\n", + "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1529\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/stable_baselines3/common/policies.py:626\u001b[0m, in \u001b[0;36mActorCriticPolicy.forward\u001b[0;34m(self, obs, deterministic)\u001b[0m\n\u001b[1;32m 624\u001b[0m \u001b[38;5;66;03m# Evaluate the values for the given observations\u001b[39;00m\n\u001b[1;32m 625\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalue_net(latent_vf)\n\u001b[0;32m--> 626\u001b[0m distribution \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_action_dist_from_latent\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlatent_pi\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 627\u001b[0m actions \u001b[38;5;241m=\u001b[39m distribution\u001b[38;5;241m.\u001b[39mget_actions(deterministic\u001b[38;5;241m=\u001b[39mdeterministic)\n\u001b[1;32m 628\u001b[0m log_prob \u001b[38;5;241m=\u001b[39m distribution\u001b[38;5;241m.\u001b[39mlog_prob(actions)\n", + "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/stable_baselines3/common/policies.py:656\u001b[0m, in \u001b[0;36mActorCriticPolicy._get_action_dist_from_latent\u001b[0;34m(self, latent_pi)\u001b[0m\n\u001b[1;32m 653\u001b[0m mean_actions \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maction_net(latent_pi)\n\u001b[1;32m 655\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maction_dist, DiagGaussianDistribution):\n\u001b[0;32m--> 656\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43maction_dist\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mproba_distribution\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmean_actions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlog_std\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 657\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maction_dist, CategoricalDistribution):\n\u001b[1;32m 658\u001b[0m \u001b[38;5;66;03m# Here mean_actions are the logits before the softmax\u001b[39;00m\n\u001b[1;32m 659\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maction_dist\u001b[38;5;241m.\u001b[39mproba_distribution(action_logits\u001b[38;5;241m=\u001b[39mmean_actions)\n", + "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/stable_baselines3/common/distributions.py:164\u001b[0m, in \u001b[0;36mDiagGaussianDistribution.proba_distribution\u001b[0;34m(self, mean_actions, log_std)\u001b[0m\n\u001b[1;32m 156\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 157\u001b[0m \u001b[38;5;124;03mCreate the distribution given its parameters (mean, std)\u001b[39;00m\n\u001b[1;32m 158\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;124;03m:return:\u001b[39;00m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 163\u001b[0m action_std \u001b[38;5;241m=\u001b[39m th\u001b[38;5;241m.\u001b[39mones_like(mean_actions) \u001b[38;5;241m*\u001b[39m log_std\u001b[38;5;241m.\u001b[39mexp()\n\u001b[0;32m--> 164\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdistribution \u001b[38;5;241m=\u001b[39m \u001b[43mNormal\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmean_actions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maction_std\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", + "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/torch/distributions/normal.py:56\u001b[0m, in \u001b[0;36mNormal.__init__\u001b[0;34m(self, loc, scale, validate_args)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 55\u001b[0m batch_shape \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mloc\u001b[38;5;241m.\u001b[39msize()\n\u001b[0;32m---> 56\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mbatch_shape\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalidate_args\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvalidate_args\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/torch/distributions/distribution.py:75\u001b[0m, in \u001b[0;36mDistribution.__init__\u001b[0;34m(self, batch_shape, event_shape, validate_args)\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m valid\u001b[38;5;241m.\u001b[39mall():\n\u001b[1;32m 68\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 69\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpected parameter \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparam\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(value)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m of shape \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtuple\u001b[39m(value\u001b[38;5;241m.\u001b[39mshape)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut found invalid values:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mvalue\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 74\u001b[0m )\n\u001b[0;32m---> 75\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m()\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "# Train :-)\n", + "model = PPO(\"MlpPolicy\", env, verbose=1, gamma=0.95)\n", + "model.learn(total_timesteps=200000)\n", + "# Store the trained Model and environment stats (which are needed as we are standardizing the observations and reward using VecNormalize())\n", + "model.save(RESULT_PATH + 'model')\n", + "env.save(RESULT_PATH + 'env.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "env.save(RESULT_PATH + 'env.pkl')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Plot the training process\n", + "training_log = pd.read_csv(RESULT_PATH + 'monitor.csv', skiprows=1)\n", + "training_log['r'].plot()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "load, price, generation = read_data()\n", + "load_eval = load[TEST_INDEX_START:]\n", + "price_eval = price[TEST_INDEX_START:]\n", + "generation_eval = generation[TEST_INDEX_START:]\n", + "\n", + "num_eval_timesteps = TEST_INDEX_END - TEST_INDEX_START\n", + "\n", + "eval_sim = BuildingSimulation(electricity_load_profile=load_eval,\n", + " solar_generation_profile=generation_eval,\n", + " electricity_price=price_eval,\n", + " max_battery_charge_per_timestep=100, \n", + " battery_capacity=400)\n", + "\n", + "eval_env = Environment(eval_sim, num_forecasting_steps=NUM_FORECAST_STEPS, max_timesteps=num_eval_timesteps)\n", + "eval_env = ObservationWrapper(eval_env, NUM_FORECAST_STEPS)\n", + "eval_env = DummyVecEnv([lambda: eval_env])\n", + "# It is important to load the environmental statistics here as we use a rolling mean calculation !\n", + "eval_env = VecNormalize.load(RESULT_PATH + 'env.pkl', eval_env) " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "eval_env.training = False\n", + "\n", + "actions, observations, electricity_consumption, price, rewards = ([], [], [], [], [])\n", + "done = False\n", + "obs = eval_env.reset()\n", + "while not done:\n", + " action = model.predict(obs, deterministic=True)\n", + " obs, r, done, info = eval_env.step([action[0][0]])\n", + "\n", + " actions.append(action[0][0][0])\n", + " original_reward = eval_env.get_original_reward()[0]\n", + " original_obs = eval_env.get_original_obs()[0]\n", + " observations.append(original_obs)\n", + " electricity_consumption.append(info[0]['electricity_consumption'])\n", + " price.append(info[0]['electricity_price'])\n", + " rewards.append(r)\n", + " \n", + "trajectory = pd.DataFrame({\n", + " 'action': actions,\n", + " 'observations': observations,\n", + " 'electricity_consumption': electricity_consumption,\n", + " 'electricity_price': price,\n", + " 'reward': rewards\n", + " }) " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_data = trajectory[200:500]\n", + "observation_df = plot_data['observations'].apply(pd.Series)\n", + "\n", + "plt.rcParams[\"figure.figsize\"] = (16,10)\n", + "\n", + "fig, ax = plt.subplots()\n", + "ax.plot(observation_df[1], label = 'Residual Load')\n", + "ax.plot(plot_data['electricity_price'], label = 'Electricity Price')\n", + "\n", + "ax1 = ax.twinx()\n", + "ax1.plot(plot_data['action'], label = 'action', color = 'black')\n", + "fig.legend(bbox_to_anchor=[0.5, 0.95], loc = 'center', ncol=5, prop={'size': 16})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Compare to Baseline" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "eval_env.training = False\n", + "\n", + "cost = []\n", + "done = False\n", + "obs = eval_env.reset()\n", + "while not done:\n", + " action = model.predict(obs, deterministic=True)\n", + " obs, r, done, info = eval_env.step([action[0][0]])\n", + " cost.append(info[0]['electricity_consumption'] * info[0]['electricity_price'])\n", + "\n", + "cost = sum(cost)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "eval_env.training = False\n", + "\n", + "baseline_cost = []\n", + "done = False\n", + "obs = eval_env.reset()\n", + "while not done:\n", + " # Always taking noop as action. This is the electricity demand if there would be no battery\n", + " action = [0]\n", + " obs, r, done, info = eval_env.step(action)\n", + " baseline_cost.append(info[0]['electricity_consumption'] * info[0]['electricity_price'])\n", + "\n", + "baseline_cost = sum(baseline_cost)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-0.0012597516984353962" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# how much energy did we save by utilizing the battery?\n", + "1 - (cost / baseline_cost)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9588993.273488251" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "baseline_cost" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9601073.024050813" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cost" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From f7a65fc56cd31db91e1c14128925ce6ccd63930a Mon Sep 17 00:00:00 2001 From: tobirohrer Date: Fri, 3 Nov 2023 15:38:02 +0100 Subject: [PATCH 2/4] Added MPC and OPC solutions and added more comments --- README.md | 16 + example_solutions/helper.py | 3 + example_solutions/model_predictive_control.py | 79 ++ example_solutions/observation_wrapper.py | 3 + example_solutions/optimal_control_problem.py | 81 ++ ...ement_learning_sample_implementation.ipynb | 1244 +---------------- requirements.txt | 3 +- 7 files changed, 219 insertions(+), 1210 deletions(-) create mode 100644 example_solutions/model_predictive_control.py diff --git a/README.md b/README.md index 2f990f2..89c0853 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,22 @@ The length of the forecast can be defined by setting the parameter `num_forecast The episode ends if the `max_timesteps` of the `Environment()` are reached. +## Example Solutions + +The folder [example_solutions](example_solutions) contains three different example solutions to solve the problem +described. + +1. By applying deep reinforcement learning using the framework [stable-baselines3](https://github.com/DLR-RM/stable-baselines3). +2. By formulating the problem as optimal control problem (OCP) using [pyomo](http://www.pyomo.org/). In this case, it + is assumed that the forecast for the price, load and generation data for the whole period is available. +3. By model predictive control, which solves the optimal control problem formulation from 2. in each time step in a closed loop manner. + In contrast to 2. only a forecast of a fixed length is given in each iteration. + +Note that the execution of the example solutions requires additional dependencies which are not specified inside `setup.py`. +Therefore, make sure to install the required python packages defined in `requirements.txt`. Additionally, an installation +of the `ipopt` solver is required in order to solve the optimal control problem +(by using conda, simply run `conda install -c conda-forge ipopt`). + ## Code Documentation The documentation is available at [https://building-energy-storage-simulation.readthedocs.io/](https://building-energy-storage-simulation.readthedocs.io/en/master/) diff --git a/example_solutions/helper.py b/example_solutions/helper.py index 21e20df..173dde3 100644 --- a/example_solutions/helper.py +++ b/example_solutions/helper.py @@ -6,6 +6,9 @@ TEST_INDEX_START = 4380 TEST_INDEX_END = 8500 +BATTERY_CAPACITY = 400 +BATTERY_POWER = 100 + def read_data() -> Tuple[np.ndarray, np.ndarray, np.ndarray]: load = pd.read_csv('../building_energy_storage_simulation/data/preprocessed/electricity_load_profile.csv')[ diff --git a/example_solutions/model_predictive_control.py b/example_solutions/model_predictive_control.py new file mode 100644 index 0000000..e861265 --- /dev/null +++ b/example_solutions/model_predictive_control.py @@ -0,0 +1,79 @@ +import pyomo.environ as pyo +import numpy as np +import matplotlib.pyplot as plt + +from building_energy_storage_simulation import BuildingSimulation, Environment +from optimal_control_problem import build_optimization_problem +from helper import read_data, TEST_INDEX_END, TEST_INDEX_START, BATTERY_POWER, BATTERY_CAPACITY + +FORECAST_LENGTH = 24 + + +def normalize_to_minus_one_to_one(x, min_value, max_value): + return -1 + 2 * (x - min_value) / (max_value - min_value) + + +solver = pyo.SolverFactory('ipopt') + +load, price, generation = read_data() +load_eval = load[TEST_INDEX_START:] +price_eval = price[TEST_INDEX_START:] +generation_eval = generation[TEST_INDEX_START:] + +num_eval_timesteps = TEST_INDEX_END - TEST_INDEX_START + +sim = BuildingSimulation(electricity_load_profile=load_eval, + solar_generation_profile=generation_eval, + electricity_price=price_eval, + max_battery_charge_per_timestep=BATTERY_POWER, + battery_capacity=BATTERY_CAPACITY) +env = Environment(sim, num_forecasting_steps=FORECAST_LENGTH, max_timesteps=num_eval_timesteps) + +obs, info = env.reset() +done = False + +actions, residual_loads, prices = (np.array([]), np.array([]), np.array([])) + +t = 0 +while not done: + load_forecast = obs[1: FORECAST_LENGTH + 1] + generation_forecast = obs[FORECAST_LENGTH + 1: 2 * FORECAST_LENGTH + 1] + price_forecast = obs[2 * FORECAST_LENGTH + 1: 3 * FORECAST_LENGTH + 1] + residual_load_forecast = load_forecast - generation_forecast + soc = obs[0] + + instance = build_optimization_problem(residual_fixed_load=residual_load_forecast, + price=price_forecast, + soc=soc / BATTERY_CAPACITY * 100, # Convert SOC due to different SOC definitions + battery_capacity=BATTERY_CAPACITY, + battery_power=BATTERY_POWER) + + solver.solve(instance, tee=True) + action = pyo.value(instance.power[0]) + actions = np.append(actions, action) + obs, reward, done, _, info = env.step(normalize_to_minus_one_to_one(action, -1 * BATTERY_POWER, BATTERY_POWER)) + residual_loads = np.append(residual_loads, residual_load_forecast[0]) + prices = np.append(prices, price_forecast[0]) + t += 1 + +baseline_cost = sum(residual_loads[residual_loads > 0] * prices[residual_loads > 0]) +augmented_load = residual_loads + actions +cost = sum(augmented_load[augmented_load > 0] * prices[augmented_load > 0]) + +print('baseline cost: ' + str(baseline_cost)) +print('cost: ' + str(cost)) +print('savings in %: ' + str(cost/baseline_cost)) + +time = range(len(actions)) + +fig1 = plt.figure() +ax = plt.subplot() +ax.plot(residual_loads, label='Residual Load') +ax.plot(residual_loads + actions, label='Augmented Load') +ax.plot(actions, label='Battery Power Applied') +ax.plot(prices, '--', label='Price') +plt.ylabel('Load and Battery Power Applied (kW) & Price (Cent per kWh)') +plt.xlabel('Time Step') +ax.legend() +ax.grid() +plt.show() diff --git a/example_solutions/observation_wrapper.py b/example_solutions/observation_wrapper.py index 382ecbd..9a3f37f 100644 --- a/example_solutions/observation_wrapper.py +++ b/example_solutions/observation_wrapper.py @@ -3,6 +3,9 @@ class ObservationWrapper(gymnasium.Wrapper): + """ + Combines generation and load into one variable to reduce dimensionality of the observation space. + """ def __init__(self, env, forecast_length): super().__init__(env) diff --git a/example_solutions/optimal_control_problem.py b/example_solutions/optimal_control_problem.py index e69de29..eac5764 100644 --- a/example_solutions/optimal_control_problem.py +++ b/example_solutions/optimal_control_problem.py @@ -0,0 +1,81 @@ +import pyomo.environ as pyo +import numpy as np +import matplotlib.pyplot as plt + +from helper import read_data, TEST_INDEX_END, TEST_INDEX_START, BATTERY_CAPACITY, BATTERY_POWER + +DELTA_TIME_HOURS = 1 + + +def build_optimization_problem(residual_fixed_load, price, soc, battery_power, battery_capacity): + # model parameter initilization + time = range(len(residual_fixed_load)) + soc_time = range(len(residual_fixed_load) + 1) + max_power_charge = battery_power + max_power_discharge = -1 * battery_power + max_soc = 100 + min_soc = 0 + soc_init = soc + energy_capacity = battery_capacity + + m = pyo.AbstractModel() + m.power = pyo.Var(time, domain=pyo.Reals, bounds=(max_power_discharge, max_power_charge)) + m.soc = pyo.Var(soc_time, bounds=(min_soc, max_soc)) + + def obj_expression(m): + return sum([price[i] * pyo.log(1 + pyo.exp((m.power[i] + residual_fixed_load[i]))) for i in time]) + + m.OBJ = pyo.Objective(rule=obj_expression, sense=pyo.minimize) + + def soc_start_rule(m): + return m.soc[0] == soc_init + + m.soc_start = pyo.Constraint(rule=soc_start_rule) + + def soc_constraint_rule(m, i): + return m.soc[i + 1] == float(100) * DELTA_TIME_HOURS * (m.power[i]) / energy_capacity + m.soc[i] + + m.soc_constraints = pyo.Constraint(time, rule=soc_constraint_rule) + + return m.create_instance() + + +if __name__ == "__main__": + solver = pyo.SolverFactory('ipopt') + + load, price, generation = read_data() + + load_eval = load[TEST_INDEX_START:TEST_INDEX_END] + price_eval = price[TEST_INDEX_START:TEST_INDEX_END] + generation_eval = generation[TEST_INDEX_START:TEST_INDEX_END] + + residual_fixed_load_eval = load_eval - generation_eval + time = range(len(residual_fixed_load_eval)) + + m = build_optimization_problem(residual_fixed_load_eval, + price_eval, + soc=0, + battery_power=BATTERY_POWER, + battery_capacity=BATTERY_CAPACITY) + solver.solve(m, tee=True) + t = [time[i] * DELTA_TIME_HOURS for i in time] + + baseline_cost = sum(residual_fixed_load_eval[residual_fixed_load_eval > 0] * price_eval[residual_fixed_load_eval > 0]) + augmented_load = residual_fixed_load_eval + np.array([(pyo.value(m.power[i])) for i in time]) + cost = sum(augmented_load[augmented_load > 0] * price_eval[augmented_load > 0]) + + print('baseline cost: ' + str(baseline_cost)) + print('cost: ' + str(cost)) + print('savings in %: ' + str(cost/baseline_cost)) + + fig1 = plt.figure() + ax = plt.subplot() + ax.plot([(residual_fixed_load_eval[i]) for i in time], label='Residual Load') + ax.plot(augmented_load, label='Augmented Load') + ax.plot(price_eval, '--', label='Price') + ax.plot([(pyo.value(m.power[i])) for i in time], label='Battery Power') + plt.ylabel('Load and Battery Power Applied (kW) & Price (Cent per kWh)') + plt.xlabel('Time Step') + ax.legend() + ax.grid() + plt.show() diff --git a/example_solutions/reinforcement_learning_sample_implementation.ipynb b/example_solutions/reinforcement_learning_sample_implementation.ipynb index d301724..912e994 100644 --- a/example_solutions/reinforcement_learning_sample_implementation.ipynb +++ b/example_solutions/reinforcement_learning_sample_implementation.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -20,7 +20,7 @@ "from building_energy_storage_simulation import BuildingSimulation, Environment\n", "\n", "from observation_wrapper import ObservationWrapper\n", - "from helper import read_data, TEST_INDEX_START, TEST_INDEX_END" + "from helper import read_data, TEST_INDEX_START, TEST_INDEX_END, BATTERY_CAPACITY, BATTERY_POWER" ] }, { @@ -32,19 +32,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[ 0. 9.89 9.08 8.22 8.57 8.93 9.2 10.71\n", - " 3.98309 5.005 4.133 4.322 4.546 3.767 3.97 4.059\n", - " 4.326 ]\n" - ] - } - ], + "outputs": [], "source": [ "NUM_FORECAST_STEPS = 8\n", "RESULT_PATH = 'rl_example/'\n", @@ -60,8 +50,8 @@ "sim = BuildingSimulation(electricity_load_profile=load_train,\n", " solar_generation_profile=generation_train,\n", " electricity_price=price_train,\n", - " max_battery_charge_per_timestep=100,\n", - " battery_capacity=400)\n", + " max_battery_charge_per_timestep=BATTERY_POWER,\n", + " battery_capacity=BATTERY_CAPACITY)\n", "\n", "env = Environment(sim, num_forecasting_steps=NUM_FORECAST_STEPS, max_timesteps=len(load_train)-NUM_FORECAST_STEPS)\n", "# ObservationWrapper combines forecast of load and generation to one residual load forecast\n", @@ -72,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -85,1087 +75,12 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using cpu device\n", - "-----------------------------\n", - "| time/ | |\n", - "| fps | 1846 |\n", - "| iterations | 1 |\n", - "| time_elapsed | 1 |\n", - "| total_timesteps | 2048 |\n", - "-----------------------------\n", - "------------------------------------------\n", - "| time/ | |\n", - "| fps | 1387 |\n", - "| iterations | 2 |\n", - "| time_elapsed | 2 |\n", - "| total_timesteps | 4096 |\n", - "| train/ | |\n", - "| approx_kl | 0.0036777142 |\n", - "| clip_fraction | 0.0229 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.42 |\n", - "| explained_variance | -0.708 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.000247 |\n", - "| n_updates | 10 |\n", - "| policy_gradient_loss | -0.00348 |\n", - "| std | 0.995 |\n", - "| value_loss | 0.234 |\n", - "------------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -5.1e+06 |\n", - "| time/ | |\n", - "| fps | 1261 |\n", - "| iterations | 3 |\n", - "| time_elapsed | 4 |\n", - "| total_timesteps | 6144 |\n", - "| train/ | |\n", - "| approx_kl | 0.0045435634 |\n", - "| clip_fraction | 0.0198 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.4 |\n", - "| explained_variance | 0.323 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.00283 |\n", - "| n_updates | 20 |\n", - "| policy_gradient_loss | -0.00426 |\n", - "| std | 0.97 |\n", - "| value_loss | 0.0201 |\n", - "------------------------------------------\n", - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -5.1e+06 |\n", - "| time/ | |\n", - "| fps | 1205 |\n", - "| iterations | 4 |\n", - "| time_elapsed | 6 |\n", - "| total_timesteps | 8192 |\n", - "| train/ | |\n", - "| approx_kl | 0.004573004 |\n", - "| clip_fraction | 0.0329 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.4 |\n", - "| explained_variance | 0.611 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.00737 |\n", - "| n_updates | 30 |\n", - "| policy_gradient_loss | -0.00455 |\n", - "| std | 0.984 |\n", - "| value_loss | 0.0243 |\n", - "-----------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -5e+06 |\n", - "| time/ | |\n", - "| fps | 1169 |\n", - "| iterations | 5 |\n", - "| time_elapsed | 8 |\n", - "| total_timesteps | 10240 |\n", - "| train/ | |\n", - "| approx_kl | 0.0037469426 |\n", - "| clip_fraction | 0.0484 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.41 |\n", - "| explained_variance | 0.57 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0261 |\n", - "| n_updates | 40 |\n", - "| policy_gradient_loss | -0.00724 |\n", - "| std | 0.99 |\n", - "| value_loss | 0.00943 |\n", - "------------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -5e+06 |\n", - "| time/ | |\n", - "| fps | 1148 |\n", - "| iterations | 6 |\n", - "| time_elapsed | 10 |\n", - "| total_timesteps | 12288 |\n", - "| train/ | |\n", - "| approx_kl | 0.0058725784 |\n", - "| clip_fraction | 0.0702 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.42 |\n", - "| explained_variance | 0.751 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0108 |\n", - "| n_updates | 50 |\n", - "| policy_gradient_loss | -0.00806 |\n", - "| std | 1 |\n", - "| value_loss | 0.0147 |\n", - "------------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.93e+06 |\n", - "| time/ | |\n", - "| fps | 1139 |\n", - "| iterations | 7 |\n", - "| time_elapsed | 12 |\n", - "| total_timesteps | 14336 |\n", - "| train/ | |\n", - "| approx_kl | 0.0058474382 |\n", - "| clip_fraction | 0.0539 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.42 |\n", - "| explained_variance | 0.635 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.00986 |\n", - "| n_updates | 60 |\n", - "| policy_gradient_loss | -0.00606 |\n", - "| std | 0.999 |\n", - "| value_loss | 0.00742 |\n", - "------------------------------------------\n", - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.93e+06 |\n", - "| time/ | |\n", - "| fps | 1132 |\n", - "| iterations | 8 |\n", - "| time_elapsed | 14 |\n", - "| total_timesteps | 16384 |\n", - "| train/ | |\n", - "| approx_kl | 0.003978129 |\n", - "| clip_fraction | 0.025 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.42 |\n", - "| explained_variance | 0.82 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0199 |\n", - "| n_updates | 70 |\n", - "| policy_gradient_loss | -0.00461 |\n", - "| std | 1 |\n", - "| value_loss | 0.00987 |\n", - "-----------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.89e+06 |\n", - "| time/ | |\n", - "| fps | 1127 |\n", - "| iterations | 9 |\n", - "| time_elapsed | 16 |\n", - "| total_timesteps | 18432 |\n", - "| train/ | |\n", - "| approx_kl | 0.0047321245 |\n", - "| clip_fraction | 0.0652 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.4 |\n", - "| explained_variance | 0.685 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0133 |\n", - "| n_updates | 80 |\n", - "| policy_gradient_loss | -0.00815 |\n", - "| std | 0.97 |\n", - "| value_loss | 0.00682 |\n", - "------------------------------------------\n", - "----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.89e+06 |\n", - "| time/ | |\n", - "| fps | 1125 |\n", - "| iterations | 10 |\n", - "| time_elapsed | 18 |\n", - "| total_timesteps | 20480 |\n", - "| train/ | |\n", - "| approx_kl | 0.00393809 |\n", - "| clip_fraction | 0.0249 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.38 |\n", - "| explained_variance | 0.846 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0126 |\n", - "| n_updates | 90 |\n", - "| policy_gradient_loss | -0.00404 |\n", - "| std | 0.955 |\n", - "| value_loss | 0.00849 |\n", - "----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.85e+06 |\n", - "| time/ | |\n", - "| fps | 1122 |\n", - "| iterations | 11 |\n", - "| time_elapsed | 20 |\n", - "| total_timesteps | 22528 |\n", - "| train/ | |\n", - "| approx_kl | 0.0056245844 |\n", - "| clip_fraction | 0.0404 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.37 |\n", - "| explained_variance | 0.752 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.00556 |\n", - "| n_updates | 100 |\n", - "| policy_gradient_loss | -0.00522 |\n", - "| std | 0.941 |\n", - "| value_loss | 0.00499 |\n", - "------------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.85e+06 |\n", - "| time/ | |\n", - "| fps | 1119 |\n", - "| iterations | 12 |\n", - "| time_elapsed | 21 |\n", - "| total_timesteps | 24576 |\n", - "| train/ | |\n", - "| approx_kl | 0.0032175046 |\n", - "| clip_fraction | 0.0312 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.35 |\n", - "| explained_variance | 0.827 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.00478 |\n", - "| n_updates | 110 |\n", - "| policy_gradient_loss | -0.00691 |\n", - "| std | 0.935 |\n", - "| value_loss | 0.00943 |\n", - "------------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.83e+06 |\n", - "| time/ | |\n", - "| fps | 1110 |\n", - "| iterations | 13 |\n", - "| time_elapsed | 23 |\n", - "| total_timesteps | 26624 |\n", - "| train/ | |\n", - "| approx_kl | 0.0033950265 |\n", - "| clip_fraction | 0.0379 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.35 |\n", - "| explained_variance | 0.738 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.00281 |\n", - "| n_updates | 120 |\n", - "| policy_gradient_loss | -0.00417 |\n", - "| std | 0.928 |\n", - "| value_loss | 0.00453 |\n", - "------------------------------------------\n", - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.83e+06 |\n", - "| time/ | |\n", - "| fps | 1054 |\n", - "| iterations | 14 |\n", - "| time_elapsed | 27 |\n", - "| total_timesteps | 28672 |\n", - "| train/ | |\n", - "| approx_kl | 0.005857446 |\n", - "| clip_fraction | 0.0508 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.33 |\n", - "| explained_variance | 0.841 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.00559 |\n", - "| n_updates | 130 |\n", - "| policy_gradient_loss | -0.00754 |\n", - "| std | 0.905 |\n", - "| value_loss | 0.00871 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.79e+06 |\n", - "| time/ | |\n", - "| fps | 1010 |\n", - "| iterations | 15 |\n", - "| time_elapsed | 30 |\n", - "| total_timesteps | 30720 |\n", - "| train/ | |\n", - "| approx_kl | 0.005098461 |\n", - "| clip_fraction | 0.0383 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.31 |\n", - "| explained_variance | 0.684 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0102 |\n", - "| n_updates | 140 |\n", - "| policy_gradient_loss | -0.00649 |\n", - "| std | 0.895 |\n", - "| value_loss | 0.00368 |\n", - "-----------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.79e+06 |\n", - "| time/ | |\n", - "| fps | 996 |\n", - "| iterations | 16 |\n", - "| time_elapsed | 32 |\n", - "| total_timesteps | 32768 |\n", - "| train/ | |\n", - "| approx_kl | 0.0066045905 |\n", - "| clip_fraction | 0.0508 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.3 |\n", - "| explained_variance | 0.862 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.01 |\n", - "| n_updates | 150 |\n", - "| policy_gradient_loss | -0.00624 |\n", - "| std | 0.885 |\n", - "| value_loss | 0.00755 |\n", - "------------------------------------------\n", - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.79e+06 |\n", - "| time/ | |\n", - "| fps | 999 |\n", - "| iterations | 17 |\n", - "| time_elapsed | 34 |\n", - "| total_timesteps | 34816 |\n", - "| train/ | |\n", - "| approx_kl | 0.004595418 |\n", - "| clip_fraction | 0.0344 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.28 |\n", - "| explained_variance | 0.662 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.00958 |\n", - "| n_updates | 160 |\n", - "| policy_gradient_loss | -0.0061 |\n", - "| std | 0.859 |\n", - "| value_loss | 0.00385 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.76e+06 |\n", - "| time/ | |\n", - "| fps | 1002 |\n", - "| iterations | 18 |\n", - "| time_elapsed | 36 |\n", - "| total_timesteps | 36864 |\n", - "| train/ | |\n", - "| approx_kl | 0.008695626 |\n", - "| clip_fraction | 0.0789 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.25 |\n", - "| explained_variance | 0.84 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0146 |\n", - "| n_updates | 170 |\n", - "| policy_gradient_loss | -0.00832 |\n", - "| std | 0.835 |\n", - "| value_loss | 0.00851 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.76e+06 |\n", - "| time/ | |\n", - "| fps | 1004 |\n", - "| iterations | 19 |\n", - "| time_elapsed | 38 |\n", - "| total_timesteps | 38912 |\n", - "| train/ | |\n", - "| approx_kl | 0.004202239 |\n", - "| clip_fraction | 0.0506 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.23 |\n", - "| explained_variance | 0.875 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0112 |\n", - "| n_updates | 180 |\n", - "| policy_gradient_loss | -0.00643 |\n", - "| std | 0.826 |\n", - "| value_loss | 0.0049 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.72e+06 |\n", - "| time/ | |\n", - "| fps | 1006 |\n", - "| iterations | 20 |\n", - "| time_elapsed | 40 |\n", - "| total_timesteps | 40960 |\n", - "| train/ | |\n", - "| approx_kl | 0.0056182286 |\n", - "| clip_fraction | 0.044 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.21 |\n", - "| explained_variance | 0.735 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0388 |\n", - "| n_updates | 190 |\n", - "| policy_gradient_loss | -0.00686 |\n", - "| std | 0.801 |\n", - "| value_loss | 0.00517 |\n", - "------------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.72e+06 |\n", - "| time/ | |\n", - "| fps | 1009 |\n", - "| iterations | 21 |\n", - "| time_elapsed | 42 |\n", - "| total_timesteps | 43008 |\n", - "| train/ | |\n", - "| approx_kl | 0.0044678794 |\n", - "| clip_fraction | 0.0564 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.2 |\n", - "| explained_variance | 0.893 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.00494 |\n", - "| n_updates | 200 |\n", - "| policy_gradient_loss | -0.00672 |\n", - "| std | 0.803 |\n", - "| value_loss | 0.00637 |\n", - "------------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.68e+06 |\n", - "| time/ | |\n", - "| fps | 987 |\n", - "| iterations | 22 |\n", - "| time_elapsed | 45 |\n", - "| total_timesteps | 45056 |\n", - "| train/ | |\n", - "| approx_kl | 0.0033513391 |\n", - "| clip_fraction | 0.0302 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.19 |\n", - "| explained_variance | 0.765 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.00761 |\n", - "| n_updates | 210 |\n", - "| policy_gradient_loss | -0.0038 |\n", - "| std | 0.794 |\n", - "| value_loss | 0.00508 |\n", - "------------------------------------------\n", - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.68e+06 |\n", - "| time/ | |\n", - "| fps | 964 |\n", - "| iterations | 23 |\n", - "| time_elapsed | 48 |\n", - "| total_timesteps | 47104 |\n", - "| train/ | |\n", - "| approx_kl | 0.004656489 |\n", - "| clip_fraction | 0.0439 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.18 |\n", - "| explained_variance | 0.908 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0369 |\n", - "| n_updates | 220 |\n", - "| policy_gradient_loss | -0.00686 |\n", - "| std | 0.779 |\n", - "| value_loss | 0.00658 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.64e+06 |\n", - "| time/ | |\n", - "| fps | 962 |\n", - "| iterations | 24 |\n", - "| time_elapsed | 51 |\n", - "| total_timesteps | 49152 |\n", - "| train/ | |\n", - "| approx_kl | 0.005987567 |\n", - "| clip_fraction | 0.05 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.16 |\n", - "| explained_variance | 0.723 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.00145 |\n", - "| n_updates | 230 |\n", - "| policy_gradient_loss | -0.00726 |\n", - "| std | 0.765 |\n", - "| value_loss | 0.00448 |\n", - "-----------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.64e+06 |\n", - "| time/ | |\n", - "| fps | 966 |\n", - "| iterations | 25 |\n", - "| time_elapsed | 52 |\n", - "| total_timesteps | 51200 |\n", - "| train/ | |\n", - "| approx_kl | 0.0054580546 |\n", - "| clip_fraction | 0.043 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.15 |\n", - "| explained_variance | 0.901 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.00864 |\n", - "| n_updates | 240 |\n", - "| policy_gradient_loss | -0.00724 |\n", - "| std | 0.766 |\n", - "| value_loss | 0.00696 |\n", - "------------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.61e+06 |\n", - "| time/ | |\n", - "| fps | 970 |\n", - "| iterations | 26 |\n", - "| time_elapsed | 54 |\n", - "| total_timesteps | 53248 |\n", - "| train/ | |\n", - "| approx_kl | 0.0048291944 |\n", - "| clip_fraction | 0.0397 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.14 |\n", - "| explained_variance | 0.754 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0127 |\n", - "| n_updates | 250 |\n", - "| policy_gradient_loss | -0.0047 |\n", - "| std | 0.745 |\n", - "| value_loss | 0.00429 |\n", - "------------------------------------------\n", - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.61e+06 |\n", - "| time/ | |\n", - "| fps | 973 |\n", - "| iterations | 27 |\n", - "| time_elapsed | 56 |\n", - "| total_timesteps | 55296 |\n", - "| train/ | |\n", - "| approx_kl | 0.006914062 |\n", - "| clip_fraction | 0.0762 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.12 |\n", - "| explained_variance | 0.898 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.0234 |\n", - "| n_updates | 260 |\n", - "| policy_gradient_loss | -0.00764 |\n", - "| std | 0.739 |\n", - "| value_loss | 0.00753 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.58e+06 |\n", - "| time/ | |\n", - "| fps | 976 |\n", - "| iterations | 28 |\n", - "| time_elapsed | 58 |\n", - "| total_timesteps | 57344 |\n", - "| train/ | |\n", - "| approx_kl | 0.004374048 |\n", - "| clip_fraction | 0.0495 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.11 |\n", - "| explained_variance | 0.734 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.00411 |\n", - "| n_updates | 270 |\n", - "| policy_gradient_loss | -0.00574 |\n", - "| std | 0.732 |\n", - "| value_loss | 0.00341 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.58e+06 |\n", - "| time/ | |\n", - "| fps | 979 |\n", - "| iterations | 29 |\n", - "| time_elapsed | 60 |\n", - "| total_timesteps | 59392 |\n", - "| train/ | |\n", - "| approx_kl | 0.006090526 |\n", - "| clip_fraction | 0.0544 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.1 |\n", - "| explained_variance | 0.896 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0111 |\n", - "| n_updates | 280 |\n", - "| policy_gradient_loss | -0.00702 |\n", - "| std | 0.722 |\n", - "| value_loss | 0.00728 |\n", - "-----------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.55e+06 |\n", - "| time/ | |\n", - "| fps | 983 |\n", - "| iterations | 30 |\n", - "| time_elapsed | 62 |\n", - "| total_timesteps | 61440 |\n", - "| train/ | |\n", - "| approx_kl | 0.0043111267 |\n", - "| clip_fraction | 0.0461 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.08 |\n", - "| explained_variance | 0.72 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0198 |\n", - "| n_updates | 290 |\n", - "| policy_gradient_loss | -0.00596 |\n", - "| std | 0.705 |\n", - "| value_loss | 0.00319 |\n", - "------------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.55e+06 |\n", - "| time/ | |\n", - "| fps | 979 |\n", - "| iterations | 31 |\n", - "| time_elapsed | 64 |\n", - "| total_timesteps | 63488 |\n", - "| train/ | |\n", - "| approx_kl | 0.0050121583 |\n", - "| clip_fraction | 0.0552 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.06 |\n", - "| explained_variance | 0.893 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.00695 |\n", - "| n_updates | 300 |\n", - "| policy_gradient_loss | -0.00873 |\n", - "| std | 0.696 |\n", - "| value_loss | 0.0067 |\n", - "------------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.55e+06 |\n", - "| time/ | |\n", - "| fps | 965 |\n", - "| iterations | 32 |\n", - "| time_elapsed | 67 |\n", - "| total_timesteps | 65536 |\n", - "| train/ | |\n", - "| approx_kl | 0.0067488514 |\n", - "| clip_fraction | 0.0677 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.05 |\n", - "| explained_variance | 0.653 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0191 |\n", - "| n_updates | 310 |\n", - "| policy_gradient_loss | -0.00957 |\n", - "| std | 0.687 |\n", - "| value_loss | 0.00315 |\n", - "------------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.52e+06 |\n", - "| time/ | |\n", - "| fps | 950 |\n", - "| iterations | 33 |\n", - "| time_elapsed | 71 |\n", - "| total_timesteps | 67584 |\n", - "| train/ | |\n", - "| approx_kl | 0.0039351527 |\n", - "| clip_fraction | 0.0503 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.04 |\n", - "| explained_variance | 0.89 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.011 |\n", - "| n_updates | 320 |\n", - "| policy_gradient_loss | -0.00724 |\n", - "| std | 0.681 |\n", - "| value_loss | 0.00592 |\n", - "------------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.52e+06 |\n", - "| time/ | |\n", - "| fps | 951 |\n", - "| iterations | 34 |\n", - "| time_elapsed | 73 |\n", - "| total_timesteps | 69632 |\n", - "| train/ | |\n", - "| approx_kl | 0.0057587875 |\n", - "| clip_fraction | 0.0638 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -1.02 |\n", - "| explained_variance | 0.652 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.00516 |\n", - "| n_updates | 330 |\n", - "| policy_gradient_loss | -0.0065 |\n", - "| std | 0.665 |\n", - "| value_loss | 0.00255 |\n", - "------------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.49e+06 |\n", - "| time/ | |\n", - "| fps | 954 |\n", - "| iterations | 35 |\n", - "| time_elapsed | 75 |\n", - "| total_timesteps | 71680 |\n", - "| train/ | |\n", - "| approx_kl | 0.0055833566 |\n", - "| clip_fraction | 0.0664 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.996 |\n", - "| explained_variance | 0.798 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.0045 |\n", - "| n_updates | 340 |\n", - "| policy_gradient_loss | -0.00726 |\n", - "| std | 0.649 |\n", - "| value_loss | 0.00423 |\n", - "------------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.49e+06 |\n", - "| time/ | |\n", - "| fps | 958 |\n", - "| iterations | 36 |\n", - "| time_elapsed | 76 |\n", - "| total_timesteps | 73728 |\n", - "| train/ | |\n", - "| approx_kl | 0.0051649846 |\n", - "| clip_fraction | 0.0429 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.993 |\n", - "| explained_variance | 0.899 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0105 |\n", - "| n_updates | 350 |\n", - "| policy_gradient_loss | -0.00616 |\n", - "| std | 0.657 |\n", - "| value_loss | 0.00457 |\n", - "------------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.47e+06 |\n", - "| time/ | |\n", - "| fps | 961 |\n", - "| iterations | 37 |\n", - "| time_elapsed | 78 |\n", - "| total_timesteps | 75776 |\n", - "| train/ | |\n", - "| approx_kl | 0.0059529413 |\n", - "| clip_fraction | 0.0619 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.995 |\n", - "| explained_variance | 0.776 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0184 |\n", - "| n_updates | 360 |\n", - "| policy_gradient_loss | -0.00641 |\n", - "| std | 0.652 |\n", - "| value_loss | 0.00389 |\n", - "------------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.47e+06 |\n", - "| time/ | |\n", - "| fps | 963 |\n", - "| iterations | 38 |\n", - "| time_elapsed | 80 |\n", - "| total_timesteps | 77824 |\n", - "| train/ | |\n", - "| approx_kl | 0.005639543 |\n", - "| clip_fraction | 0.0458 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.996 |\n", - "| explained_variance | 0.915 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.011 |\n", - "| n_updates | 370 |\n", - "| policy_gradient_loss | -0.00759 |\n", - "| std | 0.655 |\n", - "| value_loss | 0.00563 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.45e+06 |\n", - "| time/ | |\n", - "| fps | 966 |\n", - "| iterations | 39 |\n", - "| time_elapsed | 82 |\n", - "| total_timesteps | 79872 |\n", - "| train/ | |\n", - "| approx_kl | 0.006792381 |\n", - "| clip_fraction | 0.0622 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.99 |\n", - "| explained_variance | 0.756 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0247 |\n", - "| n_updates | 380 |\n", - "| policy_gradient_loss | -0.00831 |\n", - "| std | 0.646 |\n", - "| value_loss | 0.00382 |\n", - "-----------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.45e+06 |\n", - "| time/ | |\n", - "| fps | 956 |\n", - "| iterations | 40 |\n", - "| time_elapsed | 85 |\n", - "| total_timesteps | 81920 |\n", - "| train/ | |\n", - "| approx_kl | 0.0076133907 |\n", - "| clip_fraction | 0.0587 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.973 |\n", - "| explained_variance | 0.924 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0182 |\n", - "| n_updates | 390 |\n", - "| policy_gradient_loss | -0.00696 |\n", - "| std | 0.636 |\n", - "| value_loss | 0.00579 |\n", - "------------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.42e+06 |\n", - "| time/ | |\n", - "| fps | 944 |\n", - "| iterations | 41 |\n", - "| time_elapsed | 88 |\n", - "| total_timesteps | 83968 |\n", - "| train/ | |\n", - "| approx_kl | 0.0061445124 |\n", - "| clip_fraction | 0.0628 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.964 |\n", - "| explained_variance | 0.754 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0154 |\n", - "| n_updates | 400 |\n", - "| policy_gradient_loss | -0.00757 |\n", - "| std | 0.634 |\n", - "| value_loss | 0.00352 |\n", - "------------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.42e+06 |\n", - "| time/ | |\n", - "| fps | 933 |\n", - "| iterations | 42 |\n", - "| time_elapsed | 92 |\n", - "| total_timesteps | 86016 |\n", - "| train/ | |\n", - "| approx_kl | 0.0058118524 |\n", - "| clip_fraction | 0.0599 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.97 |\n", - "| explained_variance | 0.918 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.024 |\n", - "| n_updates | 410 |\n", - "| policy_gradient_loss | -0.00817 |\n", - "| std | 0.642 |\n", - "| value_loss | 0.00622 |\n", - "------------------------------------------\n", - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.4e+06 |\n", - "| time/ | |\n", - "| fps | 935 |\n", - "| iterations | 43 |\n", - "| time_elapsed | 94 |\n", - "| total_timesteps | 88064 |\n", - "| train/ | |\n", - "| approx_kl | 0.005398696 |\n", - "| clip_fraction | 0.0429 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.953 |\n", - "| explained_variance | 0.77 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.00122 |\n", - "| n_updates | 420 |\n", - "| policy_gradient_loss | -0.00627 |\n", - "| std | 0.618 |\n", - "| value_loss | 0.00296 |\n", - "-----------------------------------------\n", - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.4e+06 |\n", - "| time/ | |\n", - "| fps | 937 |\n", - "| iterations | 44 |\n", - "| time_elapsed | 96 |\n", - "| total_timesteps | 90112 |\n", - "| train/ | |\n", - "| approx_kl | 0.005530538 |\n", - "| clip_fraction | 0.0579 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.943 |\n", - "| explained_variance | 0.909 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0142 |\n", - "| n_updates | 430 |\n", - "| policy_gradient_loss | -0.00727 |\n", - "| std | 0.623 |\n", - "| value_loss | 0.00661 |\n", - "-----------------------------------------\n", - "------------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.39e+06 |\n", - "| time/ | |\n", - "| fps | 940 |\n", - "| iterations | 45 |\n", - "| time_elapsed | 98 |\n", - "| total_timesteps | 92160 |\n", - "| train/ | |\n", - "| approx_kl | 0.0078549655 |\n", - "| clip_fraction | 0.0686 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.947 |\n", - "| explained_variance | 0.765 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.0267 |\n", - "| n_updates | 440 |\n", - "| policy_gradient_loss | -0.00908 |\n", - "| std | 0.625 |\n", - "| value_loss | 0.00305 |\n", - "------------------------------------------\n", - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.39e+06 |\n", - "| time/ | |\n", - "| fps | 940 |\n", - "| iterations | 46 |\n", - "| time_elapsed | 100 |\n", - "| total_timesteps | 94208 |\n", - "| train/ | |\n", - "| approx_kl | 0.005267841 |\n", - "| clip_fraction | 0.0453 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.937 |\n", - "| explained_variance | 0.908 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | -0.00631 |\n", - "| n_updates | 450 |\n", - "| policy_gradient_loss | -0.00533 |\n", - "| std | 0.612 |\n", - "| value_loss | 0.00648 |\n", - "-----------------------------------------\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------------------------\n", - "| rollout/ | |\n", - "| ep_len_mean | 4.37e+03 |\n", - "| ep_rew_mean | -4.37e+06 |\n", - "| time/ | |\n", - "| fps | 930 |\n", - "| iterations | 47 |\n", - "| time_elapsed | 103 |\n", - "| total_timesteps | 96256 |\n", - "| train/ | |\n", - "| approx_kl | 0.007721955 |\n", - "| clip_fraction | 0.086 |\n", - "| clip_range | 0.2 |\n", - "| entropy_loss | -0.919 |\n", - "| explained_variance | 0.753 |\n", - "| learning_rate | 0.0003 |\n", - "| loss | 0.0058 |\n", - "| n_updates | 460 |\n", - "| policy_gradient_loss | -0.00828 |\n", - "| std | 0.601 |\n", - "| value_loss | 0.00257 |\n", - "-----------------------------------------\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[4], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Train :-)\u001b[39;00m\n\u001b[1;32m 2\u001b[0m model \u001b[38;5;241m=\u001b[39m PPO(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMlpPolicy\u001b[39m\u001b[38;5;124m\"\u001b[39m, env, verbose\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, gamma\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.95\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlearn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtotal_timesteps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m200000\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# Store the trained Model and environment stats (which are needed as we are standardizing the observations and reward using VecNormalize())\u001b[39;00m\n\u001b[1;32m 5\u001b[0m model\u001b[38;5;241m.\u001b[39msave(RESULT_PATH \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", - "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/stable_baselines3/ppo/ppo.py:308\u001b[0m, in \u001b[0;36mPPO.learn\u001b[0;34m(self, total_timesteps, callback, log_interval, tb_log_name, reset_num_timesteps, progress_bar)\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mlearn\u001b[39m(\n\u001b[1;32m 300\u001b[0m \u001b[38;5;28mself\u001b[39m: SelfPPO,\n\u001b[1;32m 301\u001b[0m total_timesteps: \u001b[38;5;28mint\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 306\u001b[0m progress_bar: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 307\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m SelfPPO:\n\u001b[0;32m--> 308\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlearn\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 309\u001b[0m \u001b[43m \u001b[49m\u001b[43mtotal_timesteps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtotal_timesteps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 310\u001b[0m \u001b[43m \u001b[49m\u001b[43mcallback\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallback\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 311\u001b[0m \u001b[43m \u001b[49m\u001b[43mlog_interval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlog_interval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 312\u001b[0m \u001b[43m \u001b[49m\u001b[43mtb_log_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtb_log_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 313\u001b[0m \u001b[43m \u001b[49m\u001b[43mreset_num_timesteps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreset_num_timesteps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 314\u001b[0m \u001b[43m \u001b[49m\u001b[43mprogress_bar\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprogress_bar\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 315\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/stable_baselines3/common/on_policy_algorithm.py:259\u001b[0m, in \u001b[0;36mOnPolicyAlgorithm.learn\u001b[0;34m(self, total_timesteps, callback, log_interval, tb_log_name, reset_num_timesteps, progress_bar)\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menv \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 258\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnum_timesteps \u001b[38;5;241m<\u001b[39m total_timesteps:\n\u001b[0;32m--> 259\u001b[0m continue_training \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollect_rollouts\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43menv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcallback\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrollout_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_rollout_steps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_steps\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 261\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m continue_training \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m:\n\u001b[1;32m 262\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", - "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/stable_baselines3/common/on_policy_algorithm.py:169\u001b[0m, in \u001b[0;36mOnPolicyAlgorithm.collect_rollouts\u001b[0;34m(self, env, callback, rollout_buffer, n_rollout_steps)\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m th\u001b[38;5;241m.\u001b[39mno_grad():\n\u001b[1;32m 167\u001b[0m \u001b[38;5;66;03m# Convert to pytorch tensor or to TensorDict\u001b[39;00m\n\u001b[1;32m 168\u001b[0m obs_tensor \u001b[38;5;241m=\u001b[39m obs_as_tensor(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_last_obs, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[0;32m--> 169\u001b[0m actions, values, log_probs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpolicy\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobs_tensor\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 170\u001b[0m actions \u001b[38;5;241m=\u001b[39m actions\u001b[38;5;241m.\u001b[39mcpu()\u001b[38;5;241m.\u001b[39mnumpy()\n\u001b[1;32m 172\u001b[0m \u001b[38;5;66;03m# Rescale and perform action\u001b[39;00m\n", - "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1529\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", - "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/stable_baselines3/common/policies.py:626\u001b[0m, in \u001b[0;36mActorCriticPolicy.forward\u001b[0;34m(self, obs, deterministic)\u001b[0m\n\u001b[1;32m 624\u001b[0m \u001b[38;5;66;03m# Evaluate the values for the given observations\u001b[39;00m\n\u001b[1;32m 625\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalue_net(latent_vf)\n\u001b[0;32m--> 626\u001b[0m distribution \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_action_dist_from_latent\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlatent_pi\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 627\u001b[0m actions \u001b[38;5;241m=\u001b[39m distribution\u001b[38;5;241m.\u001b[39mget_actions(deterministic\u001b[38;5;241m=\u001b[39mdeterministic)\n\u001b[1;32m 628\u001b[0m log_prob \u001b[38;5;241m=\u001b[39m distribution\u001b[38;5;241m.\u001b[39mlog_prob(actions)\n", - "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/stable_baselines3/common/policies.py:656\u001b[0m, in \u001b[0;36mActorCriticPolicy._get_action_dist_from_latent\u001b[0;34m(self, latent_pi)\u001b[0m\n\u001b[1;32m 653\u001b[0m mean_actions \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maction_net(latent_pi)\n\u001b[1;32m 655\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maction_dist, DiagGaussianDistribution):\n\u001b[0;32m--> 656\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43maction_dist\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mproba_distribution\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmean_actions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlog_std\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 657\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maction_dist, CategoricalDistribution):\n\u001b[1;32m 658\u001b[0m \u001b[38;5;66;03m# Here mean_actions are the logits before the softmax\u001b[39;00m\n\u001b[1;32m 659\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maction_dist\u001b[38;5;241m.\u001b[39mproba_distribution(action_logits\u001b[38;5;241m=\u001b[39mmean_actions)\n", - "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/stable_baselines3/common/distributions.py:164\u001b[0m, in \u001b[0;36mDiagGaussianDistribution.proba_distribution\u001b[0;34m(self, mean_actions, log_std)\u001b[0m\n\u001b[1;32m 156\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 157\u001b[0m \u001b[38;5;124;03mCreate the distribution given its parameters (mean, std)\u001b[39;00m\n\u001b[1;32m 158\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;124;03m:return:\u001b[39;00m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 163\u001b[0m action_std \u001b[38;5;241m=\u001b[39m th\u001b[38;5;241m.\u001b[39mones_like(mean_actions) \u001b[38;5;241m*\u001b[39m log_std\u001b[38;5;241m.\u001b[39mexp()\n\u001b[0;32m--> 164\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdistribution \u001b[38;5;241m=\u001b[39m \u001b[43mNormal\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmean_actions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maction_std\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", - "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/torch/distributions/normal.py:56\u001b[0m, in \u001b[0;36mNormal.__init__\u001b[0;34m(self, loc, scale, validate_args)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 55\u001b[0m batch_shape \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mloc\u001b[38;5;241m.\u001b[39msize()\n\u001b[0;32m---> 56\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mbatch_shape\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalidate_args\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvalidate_args\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/opt/miniconda3/envs/building2/lib/python3.10/site-packages/torch/distributions/distribution.py:75\u001b[0m, in \u001b[0;36mDistribution.__init__\u001b[0;34m(self, batch_shape, event_shape, validate_args)\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m valid\u001b[38;5;241m.\u001b[39mall():\n\u001b[1;32m 68\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 69\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpected parameter \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mparam\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(value)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m of shape \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtuple\u001b[39m(value\u001b[38;5;241m.\u001b[39mshape)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbut found invalid values:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mvalue\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 74\u001b[0m )\n\u001b[0;32m---> 75\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m()\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], + "outputs": [], "source": [ "# Train :-)\n", - "model = PPO(\"MlpPolicy\", env, verbose=1, gamma=0.95)\n", + "model = SAC(\"MlpPolicy\", env, verbose=1, gamma=0.95)\n", "model.learn(total_timesteps=200000)\n", "# Store the trained Model and environment stats (which are needed as we are standardizing the observations and reward using VecNormalize())\n", "model.save(RESULT_PATH + 'model')\n", @@ -1174,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1190,32 +105,11 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "scrolled": true }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Plot the training process\n", "training_log = pd.read_csv(RESULT_PATH + 'monitor.csv', skiprows=1)\n", @@ -1224,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1238,8 +132,8 @@ "eval_sim = BuildingSimulation(electricity_load_profile=load_eval,\n", " solar_generation_profile=generation_eval,\n", " electricity_price=price_eval,\n", - " max_battery_charge_per_timestep=100, \n", - " battery_capacity=400)\n", + " max_battery_charge_per_timestep=BATTERY_POWER, \n", + " battery_capacity=BATTERY_CAPACITY)\n", "\n", "eval_env = Environment(eval_sim, num_forecasting_steps=NUM_FORECAST_STEPS, max_timesteps=num_eval_timesteps)\n", "eval_env = ObservationWrapper(eval_env, NUM_FORECAST_STEPS)\n", @@ -1250,7 +144,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1282,43 +176,26 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "plot_data = trajectory[200:500]\n", "observation_df = plot_data['observations'].apply(pd.Series)\n", - "\n", + "augmented_load = observation_df[1] + plot_data['action'] * BATTERY_POWER\n", "plt.rcParams[\"figure.figsize\"] = (16,10)\n", "\n", - "fig, ax = plt.subplots()\n", - "ax.plot(observation_df[1], label = 'Residual Load')\n", - "ax.plot(plot_data['electricity_price'], label = 'Electricity Price')\n", - "\n", - "ax1 = ax.twinx()\n", - "ax1.plot(plot_data['action'], label = 'action', color = 'black')\n", - "fig.legend(bbox_to_anchor=[0.5, 0.95], loc = 'center', ncol=5, prop={'size': 16})" + "fig1 = plt.figure()\n", + "ax = plt.subplot()\n", + "ax.plot(observation_df[1], label='Residual Load')\n", + "ax.plot(augmented_load, label='Augmented Load')\n", + "ax.plot(plot_data['electricity_price'], '--', label='Price')\n", + "ax.plot(plot_data['action']*50, label='Battery Power')\n", + "plt.ylabel('Load and Battery Power Applied (kW) & Price (Cent per kWh)')\n", + "plt.xlabel('Time Step')\n", + "ax.legend()\n", + "ax.grid()\n", + "plt.show()" ] }, { @@ -1330,7 +207,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1349,7 +226,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1369,64 +246,13 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "-0.0012597516984353962" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# how much energy did we save by utilizing the battery?\n", "1 - (cost / baseline_cost)" ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "9588993.273488251" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "baseline_cost" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "9601073.024050813" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cost" - ] } ], "metadata": { diff --git a/requirements.txt b/requirements.txt index 1b9d3ea..70c38da 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ numpy gymnasium sphinx pytest -stable-baselines3 \ No newline at end of file +stable-baselines3 +pyomo \ No newline at end of file From 636f32214e3d36dbaa7db2b54c3a9d719bf815ca Mon Sep 17 00:00:00 2001 From: tobirohrer Date: Fri, 3 Nov 2023 15:47:46 +0100 Subject: [PATCH 3/4] minor changes --- ...pynb => deep_reinforcement_learning.ipynb} | 0 example_solutions/model_predictive_control.py | 2 +- example_solutions/optimal_control_problem.py | 10 +- ...ement_learning_sample_implementation.ipynb | 221 ------------------ 4 files changed, 5 insertions(+), 228 deletions(-) rename example_solutions/{reinforcement_learning_sample_implementation.ipynb => deep_reinforcement_learning.ipynb} (100%) delete mode 100644 reinforcement_learning_sample_implementation.ipynb diff --git a/example_solutions/reinforcement_learning_sample_implementation.ipynb b/example_solutions/deep_reinforcement_learning.ipynb similarity index 100% rename from example_solutions/reinforcement_learning_sample_implementation.ipynb rename to example_solutions/deep_reinforcement_learning.ipynb diff --git a/example_solutions/model_predictive_control.py b/example_solutions/model_predictive_control.py index e861265..2eafe21 100644 --- a/example_solutions/model_predictive_control.py +++ b/example_solutions/model_predictive_control.py @@ -62,7 +62,7 @@ def normalize_to_minus_one_to_one(x, min_value, max_value): print('baseline cost: ' + str(baseline_cost)) print('cost: ' + str(cost)) -print('savings in %: ' + str(cost/baseline_cost)) +print('savings in %: ' + str(1 - cost/baseline_cost)) time = range(len(actions)) diff --git a/example_solutions/optimal_control_problem.py b/example_solutions/optimal_control_problem.py index eac5764..2d06301 100644 --- a/example_solutions/optimal_control_problem.py +++ b/example_solutions/optimal_control_problem.py @@ -4,10 +4,8 @@ from helper import read_data, TEST_INDEX_END, TEST_INDEX_START, BATTERY_CAPACITY, BATTERY_POWER -DELTA_TIME_HOURS = 1 - -def build_optimization_problem(residual_fixed_load, price, soc, battery_power, battery_capacity): +def build_optimization_problem(residual_fixed_load, price, soc, battery_power, battery_capacity, delta_time_hours=1): # model parameter initilization time = range(len(residual_fixed_load)) soc_time = range(len(residual_fixed_load) + 1) @@ -33,7 +31,7 @@ def soc_start_rule(m): m.soc_start = pyo.Constraint(rule=soc_start_rule) def soc_constraint_rule(m, i): - return m.soc[i + 1] == float(100) * DELTA_TIME_HOURS * (m.power[i]) / energy_capacity + m.soc[i] + return m.soc[i + 1] == float(100) * delta_time_hours * (m.power[i]) / energy_capacity + m.soc[i] m.soc_constraints = pyo.Constraint(time, rule=soc_constraint_rule) @@ -58,7 +56,7 @@ def soc_constraint_rule(m, i): battery_power=BATTERY_POWER, battery_capacity=BATTERY_CAPACITY) solver.solve(m, tee=True) - t = [time[i] * DELTA_TIME_HOURS for i in time] + t = [time[i] for i in time] baseline_cost = sum(residual_fixed_load_eval[residual_fixed_load_eval > 0] * price_eval[residual_fixed_load_eval > 0]) augmented_load = residual_fixed_load_eval + np.array([(pyo.value(m.power[i])) for i in time]) @@ -66,7 +64,7 @@ def soc_constraint_rule(m, i): print('baseline cost: ' + str(baseline_cost)) print('cost: ' + str(cost)) - print('savings in %: ' + str(cost/baseline_cost)) + print('savings in %: ' + str(1 - cost/baseline_cost)) fig1 = plt.figure() ax = plt.subplot() diff --git a/reinforcement_learning_sample_implementation.ipynb b/reinforcement_learning_sample_implementation.ipynb deleted file mode 100644 index b326323..0000000 --- a/reinforcement_learning_sample_implementation.ipynb +++ /dev/null @@ -1,221 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "import gymnasium\n", - "import os\n", - "import pandas as pd\n", - "from matplotlib import pyplot as plt\n", - "\n", - "from stable_baselines3 import PPO\n", - "from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize\n", - "from stable_baselines3.common.monitor import Monitor\n", - "\n", - "from building_energy_storage_simulation import BuildingSimulation, Environment" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Applying Reiforcement Learning Using Stable Baselines 3\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "RL_PATH = 'rl_example/'\n", - "os.makedirs(RL_PATH, exist_ok=True)\n", - "\n", - "# Create Environment\n", - "sim = BuildingSimulation()\n", - "env = Environment(sim)\n", - "initial_obs, info = env.reset()\n", - "print(initial_obs)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Wrap with Monitor() so a log of the training is saved \n", - "env = Monitor(env, filename=RL_PATH)\n", - "# Warp with DummyVecEnc() so the observations and reward can be normalized using VecNormalize()\n", - "env = DummyVecEnv([lambda: env])\n", - "env = VecNormalize(env, norm_obs=True, norm_reward=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Train with PPO :-)\n", - "model = PPO(\"MlpPolicy\", env, verbose=1, gamma=0.95)\n", - "model.learn(total_timesteps=50000)\n", - "# Store the trained Model and environment stats (which are needed as we are standardizing the observations and reward using VecNormalize())\n", - "model.save(RL_PATH + 'model')\n", - "env.save(RL_PATH + 'env.pkl')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Evaluation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Plot the training process\n", - "training_log = pd.read_csv(RL_PATH + 'monitor.csv', skiprows=1)\n", - "training_log['r'].plot()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "env.training = False\n", - "\n", - "actions, observations, electricity_consumption, excess_energy, rewards = ([], [], [], [], [])\n", - "done = False\n", - "obs = env.reset()\n", - "while not done:\n", - " action = model.predict(obs, deterministic=True)\n", - " obs, r, done, info = env.step([action[0][0]])\n", - "\n", - " actions.append(action[0][0][0])\n", - " original_reward = env.get_original_reward()[0]\n", - " original_obs = env.get_original_obs()[0]\n", - " observations.append(original_obs)\n", - " electricity_consumption.append(info[0]['electricity_consumption'])\n", - " excess_energy.append(info[0]['electricity_price'])\n", - " rewards.append(r)\n", - " \n", - "trajectory = pd.DataFrame({\n", - " 'action': actions,\n", - " 'observations': observations,\n", - " 'electricity_consumption': electricity_consumption,\n", - " 'electricity_price': excess_energy,\n", - " 'reward': rewards\n", - " }) " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_data = trajectory[0:200]\n", - "observation_df = plot_data['observations'].apply(pd.Series)\n", - "\n", - "plt.rcParams[\"figure.figsize\"] = (16,10)\n", - "\n", - "fig, ax = plt.subplots()\n", - "ax.plot(observation_df[1], label = 'electric load')\n", - "ax.plot(observation_df[5], label = 'solar generation')\n", - "ax.plot(plot_data['electricity_price'], label = 'electricity_price')\n", - "\n", - "ax1 = ax.twinx()\n", - "ax1.plot(plot_data['action'], label = 'action', color = 'black')\n", - "fig.legend(bbox_to_anchor=[0.5, 0.95], loc = 'center', ncol=5, prop={'size': 16})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Compare to Baseline" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "env.training = False\n", - "\n", - "cost = []\n", - "done = False\n", - "obs = env.reset()\n", - "while not done:\n", - " action = model.predict(obs, deterministic=True)\n", - " obs, r, done, info = env.step([action[0][0]])\n", - " cost.append(info[0]['electricity_consumption'] * info[0]['electricity_price'])\n", - "\n", - "cost = sum(cost)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "env.training = False\n", - "\n", - "baseline_cost = []\n", - "done = False\n", - "obs = env.reset()\n", - "while not done:\n", - " # Always taking noop as action. This is the electricity demand if there would be no battery\n", - " action = [0]\n", - " obs, r, done, info = env.step(action)\n", - " baseline_cost.append(info[0]['electricity_consumption'] * info[0]['electricity_price'])\n", - "\n", - "baseline_cost = sum(baseline_cost)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# how much energy did we save by utilizing the battery?\n", - "cost / baseline_cost" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "building", - "language": "python", - "name": "building" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} From 9bf425bd5fbef7e751d51af4ebdfcaa514e621ec Mon Sep 17 00:00:00 2001 From: tobirohrer Date: Fri, 3 Nov 2023 15:55:53 +0100 Subject: [PATCH 4/4] Added more comments --- example_solutions/model_predictive_control.py | 25 +++++++++++-------- example_solutions/optimal_control_problem.py | 3 ++- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/example_solutions/model_predictive_control.py b/example_solutions/model_predictive_control.py index 2eafe21..91b4f5f 100644 --- a/example_solutions/model_predictive_control.py +++ b/example_solutions/model_predictive_control.py @@ -42,18 +42,23 @@ def normalize_to_minus_one_to_one(x, min_value, max_value): residual_load_forecast = load_forecast - generation_forecast soc = obs[0] - instance = build_optimization_problem(residual_fixed_load=residual_load_forecast, - price=price_forecast, - soc=soc / BATTERY_CAPACITY * 100, # Convert SOC due to different SOC definitions - battery_capacity=BATTERY_CAPACITY, - battery_power=BATTERY_POWER) - - solver.solve(instance, tee=True) - action = pyo.value(instance.power[0]) - actions = np.append(actions, action) - obs, reward, done, _, info = env.step(normalize_to_minus_one_to_one(action, -1 * BATTERY_POWER, BATTERY_POWER)) + optimization_problem = build_optimization_problem(residual_fixed_load=residual_load_forecast, + price=price_forecast, + soc=soc / BATTERY_CAPACITY * 100, # Convert SOC due to different SOC definitions + battery_capacity=BATTERY_CAPACITY, + battery_power=BATTERY_POWER) + solver.solve(optimization_problem, tee=True) + # Only apply the first action of the optimal solution in each iteration. This is a key concept of MPC. + action = pyo.value(optimization_problem.power[0]) + # Normalize action, as the environment expects normalized actions. + normalized_action = normalize_to_minus_one_to_one(action, -1 * BATTERY_POWER, BATTERY_POWER) + # Apply action to the environment and get new observation aka. state which is used to build the optimal control + # problem of the next time step. + obs, _, done, _, _ = env.step(normalized_action) + residual_loads = np.append(residual_loads, residual_load_forecast[0]) prices = np.append(prices, price_forecast[0]) + actions = np.append(actions, action) t += 1 baseline_cost = sum(residual_loads[residual_loads > 0] * prices[residual_loads > 0]) diff --git a/example_solutions/optimal_control_problem.py b/example_solutions/optimal_control_problem.py index 2d06301..e4be513 100644 --- a/example_solutions/optimal_control_problem.py +++ b/example_solutions/optimal_control_problem.py @@ -6,7 +6,6 @@ def build_optimization_problem(residual_fixed_load, price, soc, battery_power, battery_capacity, delta_time_hours=1): - # model parameter initilization time = range(len(residual_fixed_load)) soc_time = range(len(residual_fixed_load) + 1) max_power_charge = battery_power @@ -21,6 +20,7 @@ def build_optimization_problem(residual_fixed_load, price, soc, battery_power, b m.soc = pyo.Var(soc_time, bounds=(min_soc, max_soc)) def obj_expression(m): + # pyo.log to make the objective expression smooth and therefore solvable return sum([price[i] * pyo.log(1 + pyo.exp((m.power[i] + residual_fixed_load[i]))) for i in time]) m.OBJ = pyo.Objective(rule=obj_expression, sense=pyo.minimize) @@ -31,6 +31,7 @@ def soc_start_rule(m): m.soc_start = pyo.Constraint(rule=soc_start_rule) def soc_constraint_rule(m, i): + # Define the system dynamics as constraint return m.soc[i + 1] == float(100) * delta_time_hours * (m.power[i]) / energy_capacity + m.soc[i] m.soc_constraints = pyo.Constraint(time, rule=soc_constraint_rule)