fixing a bunch of tests and removing rllib, rllib will be supported u…

…sing gymnasium and petting zoo in future
Bam4d · Oct 18, 2023 · 2600b01 · 2600b01
1 parent 88d1be2
commit 2600b01
Show file tree

Hide file tree

Showing 21 changed files with 73 additions and 1,077 deletions.
diff --git a/python/griddly/__init__.py b/python/griddly/__init__.py
@@ -1,4 +1,5 @@
 import os
+
 import yaml
 
 from griddly.gym import GymWrapperFactory

diff --git a/python/griddly/gym.py b/python/griddly/gym.py
@@ -8,8 +8,8 @@
 from gymnasium.envs.registration import register
 from gymnasium.spaces import Discrete, MultiDiscrete
 
-from griddly.loader import GriddlyLoader
 from griddly import gd as gd
+from griddly.loader import GriddlyLoader
 from griddly.spaces.action_space import MultiAgentActionSpace
 from griddly.spaces.observation_space import (
     EntityObservationSpace,
@@ -363,7 +363,7 @@ def enable_history(self, enable: bool = True) -> None:
         self._enable_history = enable
         self.game.enable_history(enable)
 
-    def step( # type: ignore
+    def step(  # type: ignore
         self, action: Union[Action, List[Action]]
     ) -> Tuple[
         Union[List[Observation], Observation],
@@ -380,28 +380,50 @@ def step( # type: ignore
 
         ragged_actions = []
         max_num_actions = 1
+        try:
+            if self.player_count == 1:
+                ragged_actions.append(
+                    np.array(action, dtype=np.int32).reshape(
+                        -1, len(self.action_space_parts)
+                    )
+                )
+                max_num_actions = ragged_actions[0].shape[0]
+            else:
+                for p in range(self.player_count):
+                    a: Union[Action, List[Action]]
+                    if isinstance(action, list):
+                        if action[p] is None:
+                            a = np.zeros(len(self.action_space_parts), dtype=np.int32)
+                        else:
+                            a = action[p]
+                    else:
+                        a = action
+
+                    ragged_actions.append(
+                        np.array(a, dtype=np.int32).reshape(
+                            -1, len(self.action_space_parts)
+                        )
+                    )
 
-        if self.player_count == 1:
-            ragged_actions.append(np.array(action, dtype=np.int32).reshape(-1, len(self.action_space_parts)))
-            max_num_actions = ragged_actions[0].shape[0]
-        else:
-            for p in range(self.player_count):
-                if isinstance(action, list):
-                    ragged_actions.append(np.array(action[p], dtype=np.int32).reshape(-1, len(self.action_space_parts)))
-                else:
-                    ragged_actions.append(np.array(action, dtype=np.int32).reshape(-1, len(self.action_space_parts)))
-
-                if ragged_actions[p].shape[0] > max_num_actions:
-                    max_num_actions = ragged_actions[p].shape[0]
-
-        action_data = np.zeros((self.player_count, max_num_actions, len(self.action_space_parts)), dtype=np.int32)
-
-        for p in range(self.player_count):
-            for i, a in enumerate(ragged_actions[p]):
-                action_data[p, i] = a
+                    if ragged_actions[p].shape[0] > max_num_actions:
+                        max_num_actions = ragged_actions[p].shape[0]
 
+            action_data = np.zeros(
+                (self.player_count, max_num_actions, len(self.action_space_parts)),
+                dtype=np.int32,
+            )
 
-        reward, done, truncated, info = self.game.step_parallel(action_data)
+            for p in range(self.player_count):
+                for i, a in enumerate(ragged_actions[p]):
+                    action_data[p, i] = a
+
+            reward, done, truncated, info = self.game.step_parallel(action_data)
+        except Exception as e:
+            raise ValueError(
+                f"Invalid action {action} for action space {self.action_space}." \
+                "Example valid action: {self.action_space.sample()}",
+                e
+            )
 
         # Compatibility with gymnasium
         if self.player_count == 1:

diff --git a/python/griddly/loader.py b/python/griddly/loader.py
@@ -5,6 +5,7 @@
 
 from griddly import gd
 
+
 class GriddlyLoader:
     def __init__(self) -> None:
         module_path = os.path.dirname(os.path.realpath(__file__))
@@ -43,4 +44,4 @@ def load_string(self, yaml_string: str) -> gd.GDY:
 
     def load_gdy(self, gdy_path: str) -> Dict[str, Any]:
         with open(self.get_full_path(gdy_path)) as gdy_file:
-            return yaml.load(gdy_file, Loader=yaml.SafeLoader)  # type: ignore
+            return yaml.load(gdy_file, Loader=yaml.SafeLoader)  # type: ignore
diff --git a/python/griddly/spaces/action_space.py b/python/griddly/spaces/action_space.py
@@ -3,9 +3,9 @@
 from typing import TYPE_CHECKING, Any, List, Optional, Union
 
 import numpy as np
-from gymnasium.spaces import Discrete, MultiDiscrete, Space
+from gymnasium.spaces import Space
 
-from griddly.typing import Action, ActionSpace
+from griddly.typing import Action
 
 if TYPE_CHECKING:
     from griddly.gym import GymWrapper

diff --git a/python/griddly/util/breakdown.py b/python/griddly/util/breakdown.py
@@ -6,14 +6,15 @@
 import numpy.typing as npt
 import yaml
 
-from griddly.loader import GriddlyLoader
 from griddly import gd
+from griddly.loader import GriddlyLoader
 from griddly.util.vector_visualization import Vector2RGB
 
 
 class TemporaryEnvironment:
     """
-    Because we have to load the game many different times with different configurations, this class makes sure we clean up objects we dont need
+    Because we have to load the game many different times with different configurations,
+    this class makes sure we clean up objects we dont need
     """
 
     def __init__(
@@ -120,7 +121,7 @@ def _populate_common_properties(self) -> None:
             for observer_name, config in object["Observers"].items():
                 self.supported_observers.add(observer_name)
 
-        self.observer_configs: Dict[str, Dict] = { 
+        self.observer_configs: Dict[str, Dict] = {
             "Block2D": {},
             "Sprite2D": {},
             "Vector": {},
@@ -207,12 +208,12 @@ def _populate_levels(self) -> None:
                     if self.observer_configs[observer_name]["TrackAvatar"]:
                         continue
 
-                for l, level in self.levels.items():
-                    env.game.load_level(l)
+                for l_key, level in self.levels.items():
+                    env.game.load_level(l_key)
                     env.game.reset()
                     rendered_level = env.render_rgb()
-                    self.levels[l]["Observers"][observer_name] = rendered_level
-                    self.levels[l]["Size"] = [
+                    self.levels[l_key]["Observers"][observer_name] = rendered_level
+                    self.levels[l_key]["Size"] = [
                         env.game.get_width(),
                         env.game.get_height(),
                     ]
diff --git a/python/griddly/util/environment_generator_generator.py b/python/griddly/util/environment_generator_generator.py
@@ -1,10 +1,12 @@
 import os
-from typing import Optional, List, Any, Dict, Union
+from typing import Any, Dict, List, Optional, Union
+
 import gymnasium as gym
 import numpy as np
 import yaml
+
 from griddly import gd
-from griddly.gym import GymWrapperFactory, GymWrapper
+from griddly.gym import GymWrapper, GymWrapperFactory
 
 
 class EnvironmentGeneratorGenerator:

diff --git a/python/griddly/util/rllib/__init__.py b/python/griddly/util/rllib/__init__.py
diff --git a/python/griddly/util/rllib/callbacks.py b/python/griddly/util/rllib/callbacks.py
diff --git a/python/griddly/util/rllib/environment/__init__.py b/python/griddly/util/rllib/environment/__init__.py