From 8f3506ce93b5c0eed11f4f77b8c64a481ce14d3a Mon Sep 17 00:00:00 2001 From: Nihat Engin Toklu Date: Fri, 30 Aug 2024 18:13:22 +0200 Subject: [PATCH] Add an example for the functional API and a bugfix This commit adds a new example notebook which demonstrates how one can use the functional operators API to solve problems where a solution is expressed via objects. A bugfix is also introduced for the method `make_callable_evaluator` for problems where the dtype is set as object. --- examples/notebooks/Functional_API/README.md | 1 + .../notebooks/Functional_API/func_rl_ga.ipynb | 898 ++++++++++++++++++ src/evotorch/core.py | 5 +- 3 files changed, 903 insertions(+), 1 deletion(-) create mode 100644 examples/notebooks/Functional_API/func_rl_ga.ipynb diff --git a/examples/notebooks/Functional_API/README.md b/examples/notebooks/Functional_API/README.md index d315c6f..697b711 100644 --- a/examples/notebooks/Functional_API/README.md +++ b/examples/notebooks/Functional_API/README.md @@ -7,5 +7,6 @@ Here are the examples demonstrating various features of this functional API: - **[Maintaining a batch of populations using the functional EvoTorch API](batched_searches.ipynb)**: This notebook shows how one can efficiently run multiple searches simultaneously, each with its own population and hyperparameter configuration, by maintaining a batch of populations. - **[Functional genetic algorithm operators](functional_ops.ipynb)**: This notebook shows how one can implement a custom genetic algorithm by combining the genetic algorithm operator implementations provided by the functional API of EvoTorch. - **[Functional operators for multi-objective optimization](multiobj_batched_ops.ipynb)**: This notebook shows how one can use the functional operators of EvoTorch for multi-objective optimization. Additionally, batched optimization capabilities of these operators are demonstrated. +- **[Functional operators for solving problems with non-numeric solutions](func_rl_ga.ipynb)**: This notebook demonstrates how one can use the functional operators of EvoTorch for solving a problem where a solution is not expressed via a fixed-length numeric vector, but via objects (such as lists, dictionaries, etc.). In more details, this example focuses on a neuro-evolutionary reinforcement learning problem, where each policy is encoded via a dictionary. - **[Solving constrained optimization problems](constrained.ipynb)**: EvoTorch provides batching-friendly constraint penalization functions that can be used with both the object-oriented API and the functional API. In addition, these constraint penalization functions can be used with gradient-based optimization. This notebook demonstrates these features. - **[Solving reinforcement learning tasks using functional evolutionary algorithms](problem.ipynb)**: The functional evolutionary algorithm implementations of EvoTorch can be used to solve problems that are expressed using the object-oriented core API of EvoTorch. To demonstrate this, this notebook instantiates a `GymNE` problem for the reinforcement learning task "CartPole-v1", and solves it using the functional `pgpe` implementation. diff --git a/examples/notebooks/Functional_API/func_rl_ga.ipynb b/examples/notebooks/Functional_API/func_rl_ga.ipynb new file mode 100644 index 0000000..73fd326 --- /dev/null +++ b/examples/notebooks/Functional_API/func_rl_ga.ipynb @@ -0,0 +1,898 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f9dc9f5e-802f-4305-9919-bd2202f8a63b", + "metadata": {}, + "source": [ + "# Evolving objects using the functional operators API of EvoTorch\n", + "\n", + "In this notebook, we show how to use the functional operators API of EvoTorch for tackling a problem with non-numeric solutions.\n", + "\n", + "In the problem we consider, the goal is to evolve parameter tensors of a feed-forward neural network to make a simulated `Ant-v4` MuJoCo robot walk forward.\n", + "The feed-forward neural network policy has the following modules:\n", + "\n", + "- module 0: linear transformation (`torch.nn.Linear`) with a **weight** matrix and with a **bias** vector\n", + "- module 1: tanh (`torch.nn.Tanh`)\n", + "- module 2: linear transformation (`torch.nn.Linear`) with a **weight** matrix and with a **bias** vector\n", + "\n", + "In this problem, instead of a fixed-length vector consisting of real numbers, a solution is represented by a dictionary structured like this:\n", + "\n", + "```\n", + "{\n", + " \"0.weight\": [ ... list of seeds ... ],\n", + " \"0.bias\": [ ... list of seeds ... ],\n", + " \"2.weight\": [ ... list of seeds ... ],\n", + " \"2.bias\": [ ... list of seeds ... ],\n", + "}\n", + "```\n", + "\n", + "where each key is a name referring to a parameter tensor. Associated with each key is a list of integers (integers being random seeds). At the moment of decoding a solution, each parameter tensor (e.g. `\"0.weight\"`) is constructed by sampling a Gaussian noise using each seed, and then by summing those Gaussian noises (as was done in `[1]` and `[2]`).\n", + "\n", + "**Note 1:** Although this example is inspired by the studies `[1]` and `[2]`, it is not a faithful implementation of any them. Instead, this notebook focuses on demonstrating various features of the functional operators API of EvoTorch.\n", + "\n", + "**Note 2:** For the sake of simplicity, the action space of `Ant-v4` is binned. With this simplification and with its default hyperparameters, this example evolutionary algorithm is able to find gaits for the ant robot with a relatively small population size, although the evolved gaits will not be very efficient (i.e. non-competitive cumulative rewards).\n", + "\n", + "---\n", + "\n", + "`[1]` Felipe Petroski Such, Vashisht Madhavan, Edoardo Conti, Joel Lehman, Kenneth O. Stanley, Jeff Clune (2017). \"Deep neuroevolution: Genetic algorithms are a competitive alternative for training deep neural networks for reinforcement learning.\" arXiv preprint arXiv:1712.06567.\n", + "\n", + "`[2]` Risi, Sebastian, and Kenneth O. Stanley (2019). \"Deep neuroevolution of recurrent and discrete world models.\" Proceedings of the Genetic and Evolutionary Computation Conference.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "8df623bd-3cc6-44d8-888f-fe16460ffa9a", + "metadata": {}, + "source": [ + "## Summary of the evolutionary algorithm\n", + "\n", + "We implement a simple, elitist genetic algorithm with tournament selection, cross-over, and mutation operators. The main ideas of this genetic algorithm are as follows.\n", + "\n", + "**Generation of a new solution:**\n", + "- Make a new dictionary.\n", + "- Associated with each key (parameter name) within the dictionary, make a single-element list of seeds, the seed within it being a random integer.\n", + "\n", + "**Cross-over between two solutions.**\n", + "- Make two children solutions (dictionaries).\n", + "- For each key (parameter name):\n", + " - Sample a real number $p$ between 0 and 1.\n", + " - If $p < 0.5$, the first child receives its list of seeds from the first parent, the second child receives its list of seeds from the second parent.\n", + " - Otherwise, the first child receives its list of seeds from the second parent, the second child receives its list of seeds from the first parent.\n", + "\n", + "**Mutation of an existing solution.**\n", + "- Pick a key (parameter name) within the solution (dictionary).\n", + "- Randomly sample a new integer, and add this integer into the list of seeds associated with the picked key." + ] + }, + { + "cell_type": "markdown", + "id": "510e620e-a815-48c8-901b-7eeba7781786", + "metadata": {}, + "source": [ + "## Implementation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a2b0b8b-7bc6-49e3-ac27-21aba81cb2df", + "metadata": {}, + "outputs": [], + "source": [ + "from evotorch import Problem, Solution\n", + "from evotorch.tools import make_tensor, ObjectArray\n", + "import evotorch.operators.functional as func_ops\n", + "\n", + "import gymnasium as gym\n", + "import numpy as np\n", + "\n", + "import torch\n", + "from torch import nn\n", + "from torch.func import functional_call\n", + "\n", + "from typing import Iterable, Mapping, Optional, Union\n", + "import random\n", + "import os\n", + "from datetime import datetime\n", + "import pickle" + ] + }, + { + "cell_type": "markdown", + "id": "e0b522cd-4ce1-4fc5-a0a7-f066bb1a2a04", + "metadata": {}, + "source": [ + "The function below takes a series of seeds, and makes a tensor of real numbers out of them.\n", + "We will use this function at the moment of decoding a solution." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "210a3488-9370-4a87-bf8f-46f059379243", + "metadata": {}, + "outputs": [], + "source": [ + "def make_tensor_from_seeds(\n", + " like: torch.Tensor,\n", + " seeds: Iterable,\n", + " *,\n", + " mutation_power: float,\n", + " mutation_decay: float,\n", + " min_mutation_power: float,\n", + ") -> torch.Tensor:\n", + " \"\"\"\n", + " Take a series of seeds and compute a tensor out of them.\n", + "\n", + " Args:\n", + " like: A source tensor. The resulting tensor will have the same shape,\n", + " dtype, and device with this source tensor.\n", + " seeds: An iterable in which each item is an integer, each integer\n", + " being a random seed.\n", + " mutation_power: A multiplier for the Gaussian noise generated out of\n", + " a random seed.\n", + " mutation_decay: For each seed, the mutation power will be multiplied\n", + " by this factor. For example, if this multiplier is 0.9, the power\n", + " of the mutation will be decreased with each seed, as that power\n", + " will be diminished by getting multiplied with 0.9.\n", + " min_mutation_power: To prevent the mutation power from getting to\n", + " close to 0, provide a lower bound multiplier via this argument.\n", + " Returns:\n", + " The tensor generated from the given seeds.\n", + " \"\"\"\n", + " from numpy.random import RandomState\n", + "\n", + " result = torch.zeros_like(like)\n", + " for i_seed, seed in enumerate(seeds):\n", + " multiplier = max(mutation_power * (mutation_decay ** i_seed), min_mutation_power)\n", + " result += (\n", + " multiplier * torch.as_tensor(RandomState(seed).randn(*(like.shape)), dtype=like.dtype, device=like.device)\n", + " )\n", + "\n", + " return result" + ] + }, + { + "cell_type": "markdown", + "id": "7cdb5a99-7372-43a3-a7a1-07ec892c6192", + "metadata": {}, + "source": [ + "Helper function to generate a random seed integer:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe325334-8f42-41d8-98bb-a8db25980da4", + "metadata": {}, + "outputs": [], + "source": [ + "def sample_seed() -> int:\n", + " return random.randint(0, (2 ** 32) - 1)" + ] + }, + { + "cell_type": "markdown", + "id": "5808a486-5178-4409-9ba2-b0a34bf92dff", + "metadata": {}, + "source": [ + "**Observation normalization.**\n", + "Below, we have helper functions that will generate observation data for the reinforcement learning environment at hand.\n", + "The observation data will be used for normalizing the observations before passing them to the policy neural network." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06b496a8-9874-44d0-859f-b67086f62474", + "metadata": {}, + "outputs": [], + "source": [ + "def env_name_to_file_name(env_name: str) -> str:\n", + " \"\"\"\n", + " Convert the gymnasium environment ID to a more file-name-friendly counterpart.\n", + "\n", + " The character ':' in the input string will be replaced with '__colon__'.\n", + " Similarly, the character '/' in the input string will be replaced with '__slash__'.\n", + "\n", + " Args:\n", + " env_name: gymnasium environment ID\n", + " Returns:\n", + " File-name-friendly counterpart of the input string.\n", + " \"\"\"\n", + " result = env_name\n", + " result = result.replace(\":\", \"__colon__\")\n", + " result = result.replace(\"/\", \"__slash__\")\n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abf2e4c9-abe7-4a8b-b844-a07729754458", + "metadata": {}, + "outputs": [], + "source": [ + "def create_obs_data(\n", + " *,\n", + " env_name: str,\n", + " num_timesteps: int,\n", + " report_interval: Union[int, float] = 5,\n", + " seed: int = 0,\n", + ") -> tuple:\n", + " \"\"\"\n", + " Create observation normalization data with the help of random actions.\n", + "\n", + " This function creates a gymnasium environment from the given `env_name`.\n", + " Then, it keeps sending random actions to this environment, and collects stats from the observations.\n", + "\n", + " Args:\n", + " env_name: ID of the gymnasium environment\n", + " num_timesteps: For how many timesteps will the function operate on the environment\n", + " report_interval: Time interval, in seconds, for reporting the status\n", + " seed: A seed that will be used for regulating the randomness of both the environment\n", + " and of the random actions.\n", + " Returns:\n", + " A tuple of the form `(mean, stdev)`, where `mean` is the elementwise mean of the observation vectors,\n", + " and `stdev` is the elementwise standard deviation of the observation vectors.\n", + " \"\"\"\n", + " print(\"Creating observation data for\", env_name)\n", + "\n", + " class accumulated:\n", + " sum: Optional[np.ndarray] = None\n", + " sum_of_squares: Optional[np.ndarray] = None\n", + " count: int = 0\n", + "\n", + " def accumulate(obs: np.ndarray):\n", + " if accumulated.sum is None:\n", + " accumulated.sum = obs.copy()\n", + " else:\n", + " accumulated.sum += obs\n", + "\n", + " squared = obs ** 2\n", + " if accumulated.sum_of_squares is None:\n", + " accumulated.sum_of_squares = squared\n", + " else:\n", + " accumulated.sum_of_squares += squared\n", + "\n", + " accumulated.count += 1\n", + "\n", + " rndgen = np.random.RandomState(seed)\n", + "\n", + " env = gym.make(env_name)\n", + " assert isinstance(env.action_space, gym.spaces.Box), \"Can only work with Box action spaces\"\n", + "\n", + " def reset_env() -> tuple:\n", + " return env.reset(seed=rndgen.randint(2 ** 32))\n", + "\n", + " action_gap = env.action_space.high - env.action_space.low\n", + " def sample_action() -> np.ndarray:\n", + " return (rndgen.rand(*(env.action_space.shape)) * action_gap) + env.action_space.low\n", + "\n", + " observation, _ = reset_env()\n", + " accumulate(observation)\n", + "\n", + " last_report_time = datetime.now()\n", + "\n", + " for t in range(num_timesteps):\n", + " action = sample_action()\n", + " observation, _, terminated, truncated, _ = env.step(action)\n", + " accumulate(observation)\n", + "\n", + " done = terminated | truncated\n", + " if done:\n", + " observation, info = reset_env()\n", + " accumulate(observation)\n", + "\n", + " tnow = datetime.now()\n", + " if (tnow - last_report_time).total_seconds() > report_interval:\n", + " print(\"Number of timesteps:\", t, \"/\", num_timesteps)\n", + " last_report_time = tnow\n", + "\n", + " E_x = accumulated.sum / accumulated.count\n", + " E_x2 = accumulated.sum_of_squares / accumulated.count\n", + "\n", + " mean = E_x\n", + " variance = np.maximum(E_x2 - ((E_x) ** 2), 1e-2)\n", + " stdev = np.sqrt(variance)\n", + "\n", + " print(\"Done.\")\n", + " \n", + " return mean, stdev" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3fde9767-b959-450b-896b-6a5e129f329f", + "metadata": {}, + "outputs": [], + "source": [ + "def get_obs_data(env_name: str, num_timesteps: int = 50000, seed: int = 0) -> tuple:\n", + " \"\"\"\n", + " Generate observation normalization data for the gymnasium environment whose name is given.\n", + "\n", + " If such normalization data was already generated and saved into a pickle file, that pickle file will be loaded.\n", + " Otherwise, new normalization data will be generated and saved into a new pickle file.\n", + "\n", + " Args:\n", + " env_name: ID of the gymnasium environment\n", + " num_timesteps: For how many timesteps will the observation collector operate on the environment\n", + " seed: A seed that will be used for regulating the randomness of both the environment\n", + " and of the random actions.\n", + " Returns:\n", + " A tuple of the form `(mean, stdev)`, where `mean` is the elementwise mean of the observation vectors,\n", + " and `stdev` is the elementwise standard deviation of the observation vectors.\n", + " \"\"\"\n", + " num_timesteps = int(num_timesteps)\n", + " envfname = env_name_to_file_name(env_name)\n", + " fname = f\"obs_seed{seed}_t{num_timesteps}_{envfname}.pickle\"\n", + " if os.path.isfile(fname):\n", + " with open(fname, \"rb\") as f:\n", + " return pickle.load(f)\n", + " else:\n", + " obsdata = create_obs_data(env_name=env_name, num_timesteps=num_timesteps, seed=seed)\n", + " with open(fname, \"wb\") as f:\n", + " pickle.dump(obsdata, f)\n", + " return obsdata" + ] + }, + { + "cell_type": "markdown", + "id": "8f782f05-93b6-4d80-9679-e3b6028428e3", + "metadata": {}, + "source": [ + "**Problem definition.**\n", + "Below is the problem definition for the considered reinforcement learning task.\n", + "We are defining the problem as a subclass of `evotorch.Problem`, so that we will be able to use ray-based parallelization capabilities of the base `Problem` class." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8216b77-0f64-4f9d-8a3d-44cfe3deaec5", + "metadata": {}, + "outputs": [], + "source": [ + "class MyRLProblem(Problem):\n", + " def __init__(\n", + " self,\n", + " *,\n", + " env_name: str,\n", + " obs_mean: Optional[np.ndarray] = None,\n", + " obs_stdev: Optional[np.ndarray] = None,\n", + " mutation_power: float = 0.5,\n", + " mutation_decay: float = 0.9,\n", + " min_mutation_power: float = 0.05,\n", + " hidden_sizes: tuple = (64,),\n", + " bin: Optional[float] = 0.2,\n", + " num_episodes: int = 4,\n", + " episode_length: Optional[int] = None,\n", + " decrease_rewards_by: Optional[float] = 1.0,\n", + " num_actors: Optional[Union[int, str]] = \"max\"\n", + " ):\n", + " super().__init__(\n", + " objective_sense=\"max\",\n", + " dtype=object,\n", + " num_actors=num_actors,\n", + " )\n", + " self._env_name = str(env_name)\n", + " self._env = None\n", + " self._hidden_sizes = [int(hidden_size) for hidden_size in hidden_sizes]\n", + " self._policy = None\n", + "\n", + " self._obs_mean = None if obs_mean is None else np.asarray(obs_mean).astype(\"float32\")\n", + " self._obs_stdev = None if obs_mean is None else np.asarray(obs_stdev).astype(\"float32\")\n", + " self._mutation_power = float(mutation_power)\n", + " self._mutation_decay = float(mutation_decay)\n", + " self._min_mutation_power = float(min_mutation_power)\n", + " self._bin = None if bin is None else float(bin)\n", + " self._num_episodes = int(num_episodes)\n", + " self._episode_length = None if episode_length is None else int(episode_length)\n", + " self._decrease_rewards_by = None if decrease_rewards_by is None else float(decrease_rewards_by)\n", + "\n", + " def _get_policy(self) -> nn.Module:\n", + " env = self._get_env()\n", + "\n", + " if not isinstance(env.observation_space, gym.spaces.Box):\n", + " raise TypeError(\n", + " f\"Only Box-typed environments are supported. Encountered observation space is {env.observation_space}\"\n", + " )\n", + "\n", + " [obslen] = env.observation_space.shape\n", + " if isinstance(env.action_space, gym.spaces.Box):\n", + " [actlen] = env.action_space.shape\n", + " elif isinstance(env.action_space, gym.spaces.Discrete):\n", + " actlen = env.action_space.n\n", + " else:\n", + " raise TypeError(f\"Unrecognized action space: {env.action_space}\")\n", + "\n", + " all_sizes = [obslen]\n", + " all_sizes.extend(self._hidden_sizes)\n", + " all_sizes.append(actlen)\n", + "\n", + " last_size_index = len(all_sizes) - 1\n", + "\n", + " modules = []\n", + " for i in range(1, len(all_sizes)):\n", + " modules.append(nn.Linear(all_sizes[i - 1], all_sizes[i]))\n", + " if i < last_size_index:\n", + " modules.append(nn.Tanh())\n", + "\n", + " return nn.Sequential(*modules)\n", + "\n", + " def _get_env(self, visualize: bool = False) -> gym.Env:\n", + " if visualize:\n", + " return gym.make(self._env_name, render_mode=\"human\")\n", + "\n", + " if self._env is None:\n", + " self._env = gym.make(self._env_name)\n", + " return self._env\n", + "\n", + " def _generate_single_solution(self) -> dict:\n", + " policy = self._get_policy()\n", + " result = {}\n", + " for param_name, params in policy.named_parameters():\n", + " result[param_name] = [sample_seed()]\n", + " return result\n", + "\n", + " def generate_values(self, n: int) -> ObjectArray:\n", + " return make_tensor([self._generate_single_solution() for _ in range(n)], dtype=object)\n", + "\n", + " def run_solution(\n", + " self,\n", + " x: Union[Mapping, Solution],\n", + " *,\n", + " num_episodes: Optional[int] = None,\n", + " visualize: bool = False\n", + " ) -> float:\n", + " if num_episodes is None:\n", + " num_episodes = self._num_episodes\n", + "\n", + " if isinstance(x, Mapping):\n", + " sln = x\n", + " elif isinstance(x, Solution):\n", + " sln = x.values\n", + " else:\n", + " raise TypeError(f\"Expected a Mapping or a Solution, but got {repr(x)}\")\n", + "\n", + " policy = self._get_policy()\n", + "\n", + " params = {}\n", + " for param_name, param_values in policy.named_parameters():\n", + " param_seeds = sln[param_name]\n", + " params[param_name] = make_tensor_from_seeds(\n", + " param_values,\n", + " param_seeds,\n", + " mutation_power=self._mutation_power,\n", + " mutation_decay=self._mutation_decay,\n", + " min_mutation_power=self._mutation_power,\n", + " )\n", + "\n", + " env = self._get_env(visualize=visualize)\n", + "\n", + " def use_policy(policy_input: np.ndarray) -> Union[int, np.ndarray]:\n", + " if (self._obs_mean is not None) and (self._obs_stdev is not None):\n", + " policy_input = policy_input - self._obs_mean\n", + " policy_input = policy_input / self._obs_stdev\n", + "\n", + " result = functional_call(policy, params, torch.as_tensor(policy_input, dtype=torch.float32)).numpy()\n", + "\n", + " if isinstance(env.action_space, gym.spaces.Box):\n", + " if self._bin is not None:\n", + " result = np.sign(result) * self._bin\n", + " result = np.clip(result, env.action_space.low, env.action_space.high)\n", + " elif isinstance(env.action_space, gym.spaces.Discrete):\n", + " result = int(np.argmax(result))\n", + " else:\n", + " raise TypeError(f\"Unrecognized action space: {repr(env.action_space)}\")\n", + "\n", + " return result\n", + "\n", + " cumulative_reward = 0.0\n", + "\n", + " for _ in range(num_episodes):\n", + " timestep = 0\n", + " observation, info = env.reset()\n", + " while True:\n", + " action = use_policy(observation)\n", + " observation, reward, done1, done2, _ = env.step(action)\n", + " timestep += 1\n", + " if (self._decrease_rewards_by is not None) and (not visualize):\n", + " reward = reward - self._decrease_rewards_by\n", + " cumulative_reward += reward\n", + " if (\n", + " done1\n", + " or done2\n", + " or (\n", + " (not visualize)\n", + " and (self._episode_length is not None)\n", + " and (timestep >= self._episode_length)\n", + " )\n", + " ):\n", + " break\n", + "\n", + " return cumulative_reward / num_episodes\n", + "\n", + " def visualize(self, x: Union[Solution, Mapping]) -> float:\n", + " return self.run_solution(x, num_episodes=1, visualize=True)\n", + " \n", + " def _evaluate(self, x: Solution):\n", + " x.set_evaluation(self.run_solution(x))" + ] + }, + { + "cell_type": "markdown", + "id": "394066c3-6eeb-44d1-baa0-fcae4e2101ef", + "metadata": {}, + "source": [ + "We now define our mutation and cross-over operators, via the functions `mutate` and `cross_over`.\n", + "Since the solutions are expressed via dictionary-like objects, we use `Mapping` for type annotations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "676714c9-6288-459f-aeef-411849a2af2b", + "metadata": {}, + "outputs": [], + "source": [ + "def mutate(solution: Mapping) -> Mapping:\n", + " from evotorch.tools import as_immutable\n", + "\n", + " solution = {k: list(v) for k, v in solution.items()}\n", + "\n", + " keys = list(solution.keys())\n", + " chosen_key = random.choice(keys)\n", + " solution[chosen_key].append(sample_seed())\n", + "\n", + " return as_immutable(solution)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62cf3114-83b0-48d3-be2e-3dd8387d8c32", + "metadata": {}, + "outputs": [], + "source": [ + "def cross_over(parent1: Mapping, parent2: Mapping) -> tuple:\n", + " from evotorch.tools import as_immutable\n", + "\n", + " keys = list(parent1.keys())\n", + "\n", + " child1 = {}\n", + " child2 = {}\n", + " for k in keys:\n", + " p = random.random()\n", + " if p < 0.5:\n", + " child1[k] = parent1[k]\n", + " child2[k] = parent2[k]\n", + " else:\n", + " child1[k] = parent2[k]\n", + " child2[k] = parent1[k]\n", + "\n", + " return as_immutable(child1), as_immutable(child2)" + ] + }, + { + "cell_type": "markdown", + "id": "51178388-1db5-4864-8012-607558f7a151", + "metadata": {}, + "source": [ + "ID of the considered reinforcement learning task:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68f3b4f1-9120-4d0c-a314-cff8a6a1ad93", + "metadata": {}, + "outputs": [], + "source": [ + "ENV_NAME = \"Ant-v4\"" + ] + }, + { + "cell_type": "markdown", + "id": "f2f43b33-d6a1-4fd0-97bb-750a8e3c6667", + "metadata": {}, + "source": [ + "Generate or load observation data for the considered reinforcement learning environment:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35d310cd-fca4-4887-8dd8-0b586efd7e48", + "metadata": {}, + "outputs": [], + "source": [ + "env_obs_mean, env_obs_stdev = get_obs_data(ENV_NAME)\n", + "env_obs_mean, env_obs_stdev" + ] + }, + { + "cell_type": "markdown", + "id": "6e6ef3aa-1f8f-4ee1-807c-0770dbb0312f", + "metadata": {}, + "source": [ + "Instantiate the problem object:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c89d035f-c24f-45cb-ae3f-af2ffd49377d", + "metadata": {}, + "outputs": [], + "source": [ + "problem = MyRLProblem(\n", + " env_name=ENV_NAME,\n", + " decrease_rewards_by=1.0,\n", + " episode_length=250,\n", + " bin=0.15,\n", + " obs_mean=env_obs_mean,\n", + " obs_stdev=env_obs_stdev,\n", + ")\n", + "\n", + "problem" + ] + }, + { + "cell_type": "markdown", + "id": "ff43c026-6588-4b38-841c-677bd4faf0e5", + "metadata": {}, + "source": [ + "Out of the instantiated problem object, we make a callable evaluator named `f`.\n", + "The resulting object `f` can be used as a fitness function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc9c6588-535a-4e3e-90f4-e994a730068b", + "metadata": {}, + "outputs": [], + "source": [ + "f = problem.make_callable_evaluator()\n", + "f" + ] + }, + { + "cell_type": "markdown", + "id": "78b7f831-1202-44d3-88de-dbad81f1d7df", + "metadata": {}, + "source": [ + "Helper function for converting a real number to a string.\n", + "We will use this while reporting the status of the evolution." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fbd919a7-ca9d-4c13-90ef-d6cddb23faef", + "metadata": {}, + "outputs": [], + "source": [ + "def number_to_str(x) -> str:\n", + " return \"%.2f\" % float(x)" + ] + }, + { + "cell_type": "markdown", + "id": "5f1cc2b1-7d2d-44d7-8741-e523c2a01050", + "metadata": {}, + "source": [ + "Hyperparameters and constants for the evolutionary algorithm:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7c2b6eb-d1d4-4b54-a3db-7125dcb9725c", + "metadata": {}, + "outputs": [], + "source": [ + "popsize = 16\n", + "tournament_size = 4\n", + "objective_sense = problem.objective_sense\n", + "num_generations = 100" + ] + }, + { + "cell_type": "markdown", + "id": "ed62ae2d-87f5-4e5a-8dd8-57a90254dbd2", + "metadata": {}, + "source": [ + "We now prepare the initial population.\n", + "When we are dealing with non-numeric solutions, a population is represented via `evotorch.tools.ObjectArray`, instead of `torch.Tensor`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f80777a0-8b9b-492f-8306-67f8913a3596", + "metadata": {}, + "outputs": [], + "source": [ + "population = problem.generate_values(popsize)\n", + "population" + ] + }, + { + "cell_type": "markdown", + "id": "b513cd3d-01b5-4b3f-ad2d-74631f443a65", + "metadata": {}, + "source": [ + "Evaluate the fitnesses of the solutions within the initial population:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6d075e7-27be-4019-a783-d0c024fa6cd0", + "metadata": {}, + "outputs": [], + "source": [ + "evals = f(population)\n", + "evals" + ] + }, + { + "cell_type": "markdown", + "id": "128ab3ce-30f8-43b3-b966-1a7c0a5099df", + "metadata": {}, + "source": [ + "Main loop of the evolutionary search:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c78a7d4f-9c12-4bd5-977e-668acb44af79", + "metadata": {}, + "outputs": [], + "source": [ + "for generation in range(1, 1 + num_generations):\n", + " t_begin = datetime.now()\n", + "\n", + " # Apply tournament selection on the population\n", + " parent1_indices, parent2_indices = func_ops.tournament(\n", + " population,\n", + " evals,\n", + " tournament_size=tournament_size,\n", + " num_tournaments=popsize,\n", + " split_results=True,\n", + " return_indices=True,\n", + " objective_sense=objective_sense,\n", + " )\n", + "\n", + " # The results of the tournament selection are stored within the integer\n", + " # tensors `parent1_indices` and `parent2_indices`.\n", + " # The pairs of solutions for the cross-over operator are:\n", + " # - `population[parent1_indices[0]]` and `population[parent2_indices[0]]`,\n", + " # - `population[parent1_indices[1]]` and `population[parent2_indices[1]]`,\n", + " # - `population[parent1_indices[2]]` and `population[parent2_indices[2]]`,\n", + " # - and so on...\n", + " num_pairs = len(parent1_indices)\n", + " children = []\n", + " for i in range(num_pairs):\n", + " parent1_index = int(parent1_indices[i])\n", + " parent2_index = int(parent2_indices[i])\n", + " child1, child2 = cross_over(population[parent1_index], population[parent2_index])\n", + " child1 = mutate(child1)\n", + " child2 = mutate(child2)\n", + " children.extend([child1, child2])\n", + "\n", + " # With the help of the function `evotorch.tools.make_tensor(...)`,\n", + " # we convert the list of child solutions to an ObjectArray, so that\n", + " # `children` can be treated as a population of solutions.\n", + " children = make_tensor(children, dtype=object)\n", + "\n", + " # Combine the original population with the population of children,\n", + " # forming an extended population.\n", + " extended_population = func_ops.combine(population, children)\n", + "\n", + " # Evaluate all the solutions within the extended population.\n", + " extended_evals = f(extended_population)\n", + "\n", + " # Take the best `popsize` number of solutions from the extended population.\n", + " population, evals = func_ops.take_best(\n", + " extended_population, extended_evals, popsize, objective_sense=objective_sense\n", + " )\n", + "\n", + " t_end = datetime.now()\n", + " time_taken = (t_end - t_begin).total_seconds()\n", + "\n", + " # Report the status of the evolutionary search.\n", + " print(\n", + " \"Generation:\", generation,\n", + " \" Mean eval:\", number_to_str(evals.mean()),\n", + " \" Pop best:\", number_to_str(evals.max()),\n", + " \" Time:\", number_to_str(time_taken)\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "57a2765d-df97-4a48-a593-323cfff07116", + "metadata": {}, + "source": [ + "Take the index of the best solution within the last population:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a6a6376-f296-498d-8ff0-d039d6c67099", + "metadata": {}, + "outputs": [], + "source": [ + "best_index = torch.argmax(evals)\n", + "best_index" + ] + }, + { + "cell_type": "markdown", + "id": "76691a35-b997-49d5-9585-0ecf850608fc", + "metadata": {}, + "source": [ + "Take the best solution within the last population:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2f773b5-3b46-4046-828a-7ca60fd4be5a", + "metadata": {}, + "outputs": [], + "source": [ + "best_params = population[best_index]\n", + "best_params" + ] + }, + { + "cell_type": "markdown", + "id": "f63d773f-3059-462f-808a-fddf4ae721dc", + "metadata": {}, + "source": [ + "Visualize the gait of the population's best solution:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe415cc7-b08b-4ffa-9548-78c3e890bfff", + "metadata": {}, + "outputs": [], + "source": [ + "problem.visualize(best_params)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/evotorch/core.py b/src/evotorch/core.py index 5acb619..479ab4f 100644 --- a/src/evotorch/core.py +++ b/src/evotorch/core.py @@ -5246,4 +5246,7 @@ def __call__(self, values: ObjectArray) -> torch.Tensor: "The positional argument `values` was expected as an `ObjectArray`." f" However, an object of this type was encountered: {type(values)}." ) - return self._prepare_evaluated_solution_batch(values).evals.as_subclass(torch.Tensor) + result = self._prepare_evaluated_solution_batch(values).evals.as_subclass(torch.Tensor) + if len(self._problem.senses) == 1: + result = result.reshape(-1) + return result