From dd3d9624ad07a34097d149050bc9a02e201e67e1 Mon Sep 17 00:00:00 2001
From: MarkHaoxiang <mark.haoxiang@gmail.com>
Date: Sat, 21 Dec 2024 21:23:39 +0000
Subject: [PATCH 1/6] Pyproject and pixi. Initial scaffolding on pettingzoo

---
 .gitattributes      |  2 ++
 .gitignore          |  6 +++-
 pyproject.toml      | 41 +++++++++++++++++++++
 rware/pettingzoo.py | 86 +++++++++++++++++++++++++++++++++++++++++++++
 setup.py            |  2 +-
 5 files changed, 135 insertions(+), 2 deletions(-)
 create mode 100644 .gitattributes
 create mode 100644 pyproject.toml
 create mode 100644 rware/pettingzoo.py

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..8f61a8e
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+# SCM syntax highlighting
+pixi.lock linguist-language=YAML linguist-generated=true
diff --git a/.gitignore b/.gitignore
index c4b8a47..4620d00 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+pixi.lock
 
 # Created by https://www.gitignore.io/api/linux,python,windows,pycharm+all,visualstudiocode
 # Edit at https://www.gitignore.io/?templates=linux,python,windows,pycharm+all,visualstudiocode
@@ -245,4 +246,7 @@ $RECYCLE.BIN/
 # Windows shortcuts
 *.lnk
 
-# End of https://www.gitignore.io/api/linux,python,windows,pycharm+all,visualstudiocode
\ No newline at end of file
+# End of https://www.gitignore.io/api/linux,python,windows,pycharm+all,visualstudiocode
+# pixi environments
+.pixi
+*.egg-info
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..e5b109c
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,41 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "rware"
+version = "2.0.0"
+description = "Multi-Robot Warehouse environment for reinforcement learning"
+readme = { content-type = "text/markdown", file = "README.md" }
+maintainers = [{ name = "Filippos Christianos" }]
+classifiers = [
+    "Intended Audience :: Developers",
+    "Programming Language :: Python :: 3.7",
+]
+requires-python = ">=3.7"
+urls = { github = "https://github.com/semitable/robotic-warehouse" }
+dependencies = ["numpy", "gymnasium", "pyglet<2", "networkx"]
+
+[project.optional-dependencies]
+test = ["pytest"]
+pettingzoo = ["pettingzoo"]
+
+[tool.setuptools.packages.find]
+exclude = ["contrib", "docs", "tests"]
+
+# pixi
+[tool.pixi.project]
+channels = ["conda-forge"]
+platforms = ["linux-64"]
+preview = ["pixi-build"]
+
+# Environments
+[tool.pixi.environments]
+default = { solve-group = "default" }
+test = { features = ["test", "pettingzoo"], solve-group = "default" }
+
+[tool.pixi.pypi-dependencies]
+rware = { path = ".", editable = true }
+
+[tool.pixi.feature.test.tasks]
+test = "pytest"
diff --git a/rware/pettingzoo.py b/rware/pettingzoo.py
new file mode 100644
index 0000000..6325ba7
--- /dev/null
+++ b/rware/pettingzoo.py
@@ -0,0 +1,86 @@
+from typing import Dict, Tuple, List, Optional
+import warnings
+
+import gymnasium as gym
+import numpy as np
+from pettingzoo import ParallelEnv
+
+from .warehouse import Warehouse
+
+# ID are str(integers), which represent the agent.id (agent idx+1) in env.agents.
+# Set to str for compatability with TorchRL.
+AgentID = str
+# TODO: Refactor Action object to include the message bits.
+ActionType = object
+ObsType = np.ndarray
+
+
+def to_agentid_dict(data: List):
+    return {str(i + 1): x for i, x in enumerate(data)}
+
+
+class PettingZooWrapper(ParallelEnv):
+    """Wraps a Warehouse Env object to be compatible with the PettingZoo ParallelEnv API. fast_obs not supported."""
+
+    def __init__(self, env: Warehouse):
+        super().__init__()
+        self._env = env
+        self.agents = [str(agent.id) for agent in self._env.agents]
+        self.possible_agents = self.agents
+
+    def reset(self, seed: Optional[int] = None, options: Optional[Dict] = None):
+        self._env.reset(seed, options)
+
+    def step(self, actions: dict[AgentID, ActionType]) -> Tuple[
+        dict[AgentID, ObsType],
+        dict[AgentID, float],
+        dict[AgentID, bool],
+        dict[AgentID, bool],
+        dict[AgentID, dict],
+    ]:
+        # Unwrap to list of actions
+        actions_unwrapped = [(int(id_) - 1, action) for id_, action in actions.items()]
+        actions_unwrapped.sort(key=lambda x: x[0])
+        actions_unwrapped = [x[1] for x in actions]
+        assert (
+            len(actions_unwrapped) == self._env.n_agents
+        ), f"Incorrect number of actions provided. Expected {self._env.n_agents} but got {len(actions_unwrapped)}"
+
+        # Step inner environment
+        obs, rewards, terminated, truncated, info = self._env.step(actions_unwrapped)
+
+        # Transform to PettingZoo output
+        obs = to_agentid_dict(obs)
+        rewards = to_agentid_dict(rewards)
+        terminated = to_agentid_dict(terminated)
+        truncated = to_agentid_dict(truncated)
+        if len(info) != 0:
+            warnings.warn(
+                "Error: expected info dict to be empty. PettingZooWrapper is likely out of date."
+            )
+        info = {str(i + 1): {} for i in range(self._env.n_agents)}
+
+        return obs, rewards, terminated, truncated, info
+
+    def render(self):
+        return self._env.render()
+
+    def close(self) -> None:
+        self._env.close()
+
+    def state(self):
+        return self._env.get_global_image()
+
+    def observation_space(self, agent: AgentID) -> gym.spaces.Space:
+        space = self._env.observation_space
+        if self._env.fast_obs:
+            raise NotImplementedError(
+                "PettingZooWrapper not yet supported for Warehouse fast_obs."
+            )
+        assert isinstance(space, gym.spaces.Tuple)
+        return space[int(agent) - 1]
+
+    def action_space(self, agent: AgentID) -> gym.spaces.Space:
+        space = self._env.observation_space
+        assert isinstance(space, gym.spaces.Tuple)
+        return space[int(agent) - 1]
diff --git a/setup.py b/setup.py
index 3458de6..be91643 100644
--- a/setup.py
+++ b/setup.py
@@ -27,6 +27,6 @@
         "pyglet<2",
         "networkx",
     ],
-    extras_require={"test": ["pytest"]},
+    extras_require={"test": ["pytest"], "pettingzoo": ["pettingzoo"]},
     include_package_data=True,
 )

From da55e4a6b3536ec705d10b9c8b78a01ee515358a Mon Sep 17 00:00:00 2001
From: MarkHaoxiang <mark.haoxiang@gmail.com>
Date: Sat, 21 Dec 2024 22:25:41 +0000
Subject: [PATCH 2/6] PettingZoo bug fixes and test

---
 rware/pettingzoo.py       | 18 +++++++++++----
 tests/test_integration.py | 48 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+), 5 deletions(-)
 create mode 100644 tests/test_integration.py

diff --git a/rware/pettingzoo.py b/rware/pettingzoo.py
index 6325ba7..808d69a 100644
--- a/rware/pettingzoo.py
+++ b/rware/pettingzoo.py
@@ -29,7 +29,13 @@ def __init__(self, env: Warehouse):
         self.possible_agents = self.agents
 
     def reset(self, seed: Optional[int] = None, options: Optional[Dict] = None):
-        self._env.reset(seed, options)
+        obs, info = self._env.reset(seed, options)
+        obs = to_agentid_dict(obs)
+        info = {str(i + 1): {} for i in range(self._env.n_agents)}
+        # Reset agents
+        self.agents = [str(agent.id) for agent in self._env.agents]
+        self.possible_agents = self.agents
+        return obs, info
 
     def step(self, actions: dict[AgentID, ActionType]) -> Tuple[
         dict[AgentID, ObsType],
@@ -41,7 +47,7 @@ def step(self, actions: dict[AgentID, ActionType]) -> Tuple[
         # Unwrap to list of actions
         actions_unwrapped = [(int(id_) - 1, action) for id_, action in actions.items()]
         actions_unwrapped.sort(key=lambda x: x[0])
-        actions_unwrapped = [x[1] for x in actions]
+        actions_unwrapped = [x[1] for x in actions_unwrapped]
         assert (
             len(actions_unwrapped) == self._env.n_agents
         ), f"Incorrect number of actions provided. Expected {self._env.n_agents} but got {len(actions_unwrapped)}"
@@ -52,8 +58,10 @@ def step(self, actions: dict[AgentID, ActionType]) -> Tuple[
         # Transform to PettingZoo output
         obs = to_agentid_dict(obs)
         rewards = to_agentid_dict(rewards)
-        terminated = to_agentid_dict(terminated)
-        truncated = to_agentid_dict(truncated)
+        if terminated or truncated:
+            self.agents = []  # PettingZoo requires agents to be removed
+        terminated = to_agentid_dict([terminated for _ in range(self._env.n_agents)])
+        truncated = to_agentid_dict([truncated for _ in range(self._env.n_agents)])
         if len(info) != 0:
             warnings.warn(
                 "Error: expected info dict to be empty. PettingZooWrapper is likely out of date."
@@ -81,6 +89,6 @@ def observation_space(self, agent: AgentID) -> gym.spaces.Space:
         return space[int(agent) - 1]
 
     def action_space(self, agent: AgentID) -> gym.spaces.Space:
-        space = self._env.observation_space
+        space = self._env.action_space
         assert isinstance(space, gym.spaces.Tuple)
         return space[int(agent) - 1]
diff --git a/tests/test_integration.py b/tests/test_integration.py
new file mode 100644
index 0000000..c1ffb33
--- /dev/null
+++ b/tests/test_integration.py
@@ -0,0 +1,48 @@
+from typing import Optional
+import importlib
+import pytest
+
+from rware.warehouse import Warehouse, RewardType, ObservationType
+
+_has_pettingzoo = importlib.util.find_spec("pettingzoo") is not None
+if _has_pettingzoo:
+    from pettingzoo.test import parallel_api_test
+    from rware.pettingzoo import PettingZooWrapper
+
+
+@pytest.mark.parametrize("n_agents", [1, 3])
+@pytest.mark.parametrize("msg_bits", [0, 1])
+@pytest.mark.parametrize("sensor_range", [1, 3])
+@pytest.mark.parametrize("max_inactivity_steps", [None, 10, 1000])
+@pytest.mark.parametrize("reward_type", [RewardType.GLOBAL, RewardType.INDIVIDUAL])
+@pytest.mark.parametrize(
+    "observation_type",
+    [ObservationType.DICT, ObservationType.IMAGE, ObservationType.IMAGE_DICT],
+)
+def test_pettingzoo_wrapper(
+    n_agents: int,
+    msg_bits: int,
+    sensor_range: int,
+    max_inactivity_steps: Optional[int],
+    reward_type: RewardType,
+    observation_type: ObservationType,
+):
+    if not _has_pettingzoo:
+        pytest.skip("PettingZoo not available.")
+        return
+
+    env = Warehouse(
+        shelf_columns=1,
+        column_height=5,
+        shelf_rows=3,
+        n_agents=n_agents,
+        msg_bits=msg_bits,
+        sensor_range=sensor_range,
+        request_queue_size=5,
+        max_inactivity_steps=max_inactivity_steps,
+        max_steps=None,
+        reward_type=reward_type,
+        observation_type=observation_type,
+    )
+    env = PettingZooWrapper(env)
+    parallel_api_test(env)

From 727824c5bfe7142123fe5489e88fb89c9b1819b7 Mon Sep 17 00:00:00 2001
From: MarkHaoxiang <mark.haoxiang@gmail.com>
Date: Sat, 21 Dec 2024 22:42:22 +0000
Subject: [PATCH 3/6] Pixi set as package

---
 pyproject.toml | 21 ++++++++++++++-------
 setup.py       |  2 +-
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index e5b109c..13dbfeb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,10 +1,6 @@
-[build-system]
-requires = ["setuptools"]
-build-backend = "setuptools.build_meta"
-
 [project]
 name = "rware"
-version = "2.0.0"
+version = "2.0.1"
 description = "Multi-Robot Warehouse environment for reinforcement learning"
 readme = { content-type = "text/markdown", file = "README.md" }
 maintainers = [{ name = "Filippos Christianos" }]
@@ -16,6 +12,10 @@ requires-python = ">=3.7"
 urls = { github = "https://github.com/semitable/robotic-warehouse" }
 dependencies = ["numpy", "gymnasium", "pyglet<2", "networkx"]
 
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
 [project.optional-dependencies]
 test = ["pytest"]
 pettingzoo = ["pettingzoo"]
@@ -24,12 +24,11 @@ pettingzoo = ["pettingzoo"]
 exclude = ["contrib", "docs", "tests"]
 
 # pixi
-[tool.pixi.project]
+[tool.pixi.workspace]
 channels = ["conda-forge"]
 platforms = ["linux-64"]
 preview = ["pixi-build"]
 
-# Environments
 [tool.pixi.environments]
 default = { solve-group = "default" }
 test = { features = ["test", "pettingzoo"], solve-group = "default" }
@@ -37,5 +36,13 @@ test = { features = ["test", "pettingzoo"], solve-group = "default" }
 [tool.pixi.pypi-dependencies]
 rware = { path = ".", editable = true }
 
+[tool.pixi.package]
+name = "rware"
+version = "2.0.1"
+
+[tool.pixi.build-system]
+build-backend = { name = "pixi-build-python", version = "*" }
+channels = ["pixi-build-backends", "conda-forge"]
+
 [tool.pixi.feature.test.tasks]
 test = "pytest"
diff --git a/setup.py b/setup.py
index be91643..e42d987 100644
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,7 @@
 
 setup(
     name="rware",
-    version="2.0.0",
+    version="2.0.1",
     description="Multi-Robot Warehouse environment for reinforcement learning",
     long_description=README,
     long_description_content_type="text/markdown",

From 96ec215c9e70aca788e99a3945e10b2d77c2826b Mon Sep 17 00:00:00 2001
From: MarkHaoxiang <mark.haoxiang@gmail.com>
Date: Sat, 21 Dec 2024 23:08:53 +0000
Subject: [PATCH 4/6] action_spaces and observation_spaces dict

---
 rware/pettingzoo.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/rware/pettingzoo.py b/rware/pettingzoo.py
index 808d69a..4ffa54b 100644
--- a/rware/pettingzoo.py
+++ b/rware/pettingzoo.py
@@ -25,16 +25,24 @@ class PettingZooWrapper(ParallelEnv):
     def __init__(self, env: Warehouse):
         super().__init__()
         self._env = env
-        self.agents = [str(agent.id) for agent in self._env.agents]
-        self.possible_agents = self.agents
+        self.agents = self.possible_agents = []
+        self.observation_spaces = self.action_spaces = {}
 
     def reset(self, seed: Optional[int] = None, options: Optional[Dict] = None):
         obs, info = self._env.reset(seed, options)
         obs = to_agentid_dict(obs)
         info = {str(i + 1): {} for i in range(self._env.n_agents)}
-        # Reset agents
+        # Reset agents and spaces
         self.agents = [str(agent.id) for agent in self._env.agents]
         self.possible_agents = self.agents
+        self.observation_spaces = {
+            agent_id: self.observation_space(agent_id)
+            for agent_id in [str(i + 1) for i in range(self._env.n_agents)]
+        }
+        self.action_spaces = {
+            agent_id: self.action_space(agent_id)
+            for agent_id in [str(i + 1) for i in range(self._env.n_agents)]
+        }
         return obs, info
 
     def step(self, actions: dict[AgentID, ActionType]) -> Tuple[
@@ -83,7 +91,7 @@ def observation_space(self, agent: AgentID) -> gym.spaces.Space:
         space = self._env.observation_space
         if self._env.fast_obs:
             raise NotImplementedError(
-                "PettingZooWrapper not yet supported for Warehouse fast_obs."
+                "PettingZooWrapper not yet supported for ObservationType.FLATTENED."
             )
         assert isinstance(space, gym.spaces.Tuple)
         return space[int(agent) - 1]

From 9d631e5ad8227c1e65db5ae5e70fa170a6fc8a81 Mon Sep 17 00:00:00 2001
From: MarkHaoxiang <mark.haoxiang@gmail.com>
Date: Sun, 22 Dec 2024 07:38:45 +0000
Subject: [PATCH 5/6] Support FlattenedObs (after checking issue with failing
 test_env tests)

---
 rware/pettingzoo.py       | 6 +-----
 tests/test_integration.py | 9 +++++++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/rware/pettingzoo.py b/rware/pettingzoo.py
index 4ffa54b..45be8d5 100644
--- a/rware/pettingzoo.py
+++ b/rware/pettingzoo.py
@@ -20,7 +20,7 @@ def to_agentid_dict(data: List):
 
 
 class PettingZooWrapper(ParallelEnv):
-    """Wraps a Warehouse Env object to be compatible with the PettingZoo ParallelEnv API. fast_obs not supported."""
+    """Wraps a Warehouse Env object to be compatible with the PettingZoo ParallelEnv API."""
 
     def __init__(self, env: Warehouse):
         super().__init__()
@@ -89,10 +89,6 @@ def state(self):
 
     def observation_space(self, agent: AgentID) -> gym.spaces.Space:
         space = self._env.observation_space
-        if self._env.fast_obs:
-            raise NotImplementedError(
-                "PettingZooWrapper not yet supported for ObservationType.FLATTENED."
-            )
         assert isinstance(space, gym.spaces.Tuple)
         return space[int(agent) - 1]
 
diff --git a/tests/test_integration.py b/tests/test_integration.py
index c1ffb33..53517e8 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -13,11 +13,16 @@
 @pytest.mark.parametrize("n_agents", [1, 3])
 @pytest.mark.parametrize("msg_bits", [0, 1])
 @pytest.mark.parametrize("sensor_range", [1, 3])
-@pytest.mark.parametrize("max_inactivity_steps", [None, 10, 1000])
+@pytest.mark.parametrize("max_inactivity_steps", [None, 10])
 @pytest.mark.parametrize("reward_type", [RewardType.GLOBAL, RewardType.INDIVIDUAL])
 @pytest.mark.parametrize(
     "observation_type",
-    [ObservationType.DICT, ObservationType.IMAGE, ObservationType.IMAGE_DICT],
+    [
+        ObservationType.DICT,
+        ObservationType.IMAGE,
+        ObservationType.IMAGE_DICT,
+        ObservationType.FLATTENED,
+    ],
 )
 def test_pettingzoo_wrapper(
     n_agents: int,

From c83e50ea469a4a7a6881c3a2f3a01da4dc083209 Mon Sep 17 00:00:00 2001
From: MarkHaoxiang <mark.haoxiang@gmail.com>
Date: Sun, 22 Dec 2024 07:52:06 +0000
Subject: [PATCH 6/6] Bump version back down

---
 pyproject.toml | 2 +-
 setup.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 13dbfeb..7ecf543 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "rware"
-version = "2.0.1"
+version = "2.0.0"
 description = "Multi-Robot Warehouse environment for reinforcement learning"
 readme = { content-type = "text/markdown", file = "README.md" }
 maintainers = [{ name = "Filippos Christianos" }]
diff --git a/setup.py b/setup.py
index e42d987..be91643 100644
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,7 @@
 
 setup(
     name="rware",
-    version="2.0.1",
+    version="2.0.0",
     description="Multi-Robot Warehouse environment for reinforcement learning",
     long_description=README,
     long_description_content_type="text/markdown",