add snake environment

OpenRL-Lab · Aug 18, 2023 · f06c9b1 · f06c9b1
2 parents 6300cc6 + 401569b
commit f06c9b1
Show file tree

Hide file tree

Showing 36 changed files with 2,931 additions and 82 deletions.
diff --git a/Gallery.md b/Gallery.md
@@ -54,18 +54,19 @@ Users are also welcome to contribute their own training examples and demos to th
 
 <div align="center">
 
-|                                                                                                   Environment/Demo                                                                                                    |                                                        Tags                                                         |              Refs               |
-|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------:|:-------------------------------:|
-|                                                  [MuJoCo](https://github.com/deepmind/mujoco)<br>  <img width="300px" height="auto" src="./docs/images/mujoco.png">                                                   |                            ![continuous](https://img.shields.io/badge/-continous-green)                             |   [code](./examples/mujoco/)    |
-|                               [CartPole](https://gymnasium.farama.org/environments/classic_control/cart_pole/)<br>  <img width="300px" height="auto" src="./docs/images/cartpole.png">                                |                           ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                           |  [code](./examples/cartpole/)   |
-|                       [MPE: Simple Spread](https://pettingzoo.farama.org/environments/mpe/simple_spread/)<br>  <img width="300px" height="auto" src="./docs/images/simple_spread_trained.gif">                        | ![discrete](https://img.shields.io/badge/-discrete-brightgreen)  ![MARL](https://img.shields.io/badge/-MARL-yellow) |     [code](./examples/mpe/)     |
-|                                                  [StarCraft II](https://github.com/oxwhirl/smac)<br>  <img width="300px" height="auto" src="./docs/images/smac.png">                                                  | ![discrete](https://img.shields.io/badge/-discrete-brightgreen)  ![MARL](https://img.shields.io/badge/-MARL-yellow) |    [code](./examples/smac/)     |
-|                                [Chat Bot](https://openrl-docs.readthedocs.io/en/latest/quick_start/train_nlp.html)<br>  <img width="300px" height="auto" src="./docs/images/chat.gif">                                |                          ![discrete](https://img.shields.io/badge/-discrete-brightgreen)        ![NLP](https://img.shields.io/badge/-NLP-green)     ![Transformer](https://img.shields.io/badge/-Transformer-blue)                               |     [code](./examples/nlp/)     |
-|                                        [Atari Pong](https://gymnasium.farama.org/environments/atari/pong/)<br>  <img width="300px" height="auto" src="./docs/images/pong.png">                                        |                          ![discrete](https://img.shields.io/badge/-discrete-brightgreen)        ![image](https://img.shields.io/badge/-image-red)                                    |    [code](./examples/atari/)    |
-|                                   [PettingZoo: Tic-Tac-Toe](https://pettingzoo.farama.org/environments/classic/tictactoe/)<br>  <img width="300px" height="auto" src="./docs/images/tic-tac-toe.jpeg">                                    |                      ![selfplay](https://img.shields.io/badge/-selfplay-blue)    ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                               |  [code](./examples/selfplay/)   |
-|                                   [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)<br>  <img width="300px" height="auto" src="https://shimmy.farama.org/_images/dm_locomotion.png">                                    |           ![continuous](https://img.shields.io/badge/-continous-green)                                         | [code](./examples/dm_control/)  |
-|                                   [Omniverse Isaac Gym](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs)<br>  <img width="300px" height="auto" src="https://user-images.githubusercontent.com/34286328/171454189-6afafbff-bb61-4aac-b518-24646007cb9f.gif">                                    |                       ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                               |    [code](./examples/isaac/)    |
-|                                                      [GridWorld](./examples/gridworld/)<br>  <img width="300px" height="auto" src="./docs/images/gridworld.jpg">                                                      |                          ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                               |  [code](./examples/gridworld/)  |
-| [Super Mario Bros](https://github.com/Kautenja/gym-super-mario-bros)<br>  <img width="300px" height="auto" src="https://user-images.githubusercontent.com/2184469/40948820-3d15e5c2-6830-11e8-81d4-ecfaffee0a14.png"> |                           ![discrete](https://img.shields.io/badge/-discrete-brightgreen)     ![image](https://img.shields.io/badge/-image-red)                      | [code](./examples/super_mario/) |
-|                                                 [Gym Retro](https://github.com/openai/retro)<br>  <img width="300px" height="auto" src="./docs/images/gym-retro.jpg">                                                 |                           ![discrete](https://img.shields.io/badge/-discrete-brightgreen)     ![image](https://img.shields.io/badge/-image-red)                      |    [code](./examples/retro/)    |
+|                                                                                                        Environment/Demo                                                                                                        |                                                                                           Tags                                                                                            |              Refs               |
+|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-------------------------------:|
+|                                                       [MuJoCo](https://github.com/deepmind/mujoco)<br>  <img width="300px" height="auto" src="./docs/images/mujoco.png">                                                       |                                                               ![continuous](https://img.shields.io/badge/-continous-green)                                                                |   [code](./examples/mujoco/)    |
+|                                    [CartPole](https://gymnasium.farama.org/environments/classic_control/cart_pole/)<br>  <img width="300px" height="auto" src="./docs/images/cartpole.png">                                    |                                                              ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                                                              |  [code](./examples/cartpole/)   |
+|                            [MPE: Simple Spread](https://pettingzoo.farama.org/environments/mpe/simple_spread/)<br>  <img width="300px" height="auto" src="./docs/images/simple_spread_trained.gif">                            |                                    ![discrete](https://img.shields.io/badge/-discrete-brightgreen)  ![MARL](https://img.shields.io/badge/-MARL-yellow)                                    |     [code](./examples/mpe/)     |
+|                                                      [StarCraft II](https://github.com/oxwhirl/smac)<br>  <img width="300px" height="auto" src="./docs/images/smac.png">                                                       |                                    ![discrete](https://img.shields.io/badge/-discrete-brightgreen)  ![MARL](https://img.shields.io/badge/-MARL-yellow)                                    |    [code](./examples/smac/)     |
+|                                    [Chat Bot](https://openrl-docs.readthedocs.io/en/latest/quick_start/train_nlp.html)<br>  <img width="300px" height="auto" src="./docs/images/chat.gif">                                     | ![discrete](https://img.shields.io/badge/-discrete-brightgreen)        ![NLP](https://img.shields.io/badge/-NLP-green)     ![Transformer](https://img.shields.io/badge/-Transformer-blue) |     [code](./examples/nlp/)     |
+|                                            [Atari Pong](https://gymnasium.farama.org/environments/atari/pong/)<br>  <img width="300px" height="auto" src="./docs/images/pong.png">                                             |                                 ![discrete](https://img.shields.io/badge/-discrete-brightgreen)        ![image](https://img.shields.io/badge/-image-red)                                  |    [code](./examples/atari/)    |
+|                              [PettingZoo: Tic-Tac-Toe](https://pettingzoo.farama.org/environments/classic/tictactoe/)<br>  <img width="300px" height="auto" src="./docs/images/tic-tac-toe.jpeg">                              |                                ![selfplay](https://img.shields.io/badge/-selfplay-blue)    ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                                |  [code](./examples/selfplay/)   |
+|                            [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)<br>  <img width="300px" height="auto" src="https://shimmy.farama.org/_images/dm_locomotion.png">                             |                                                               ![continuous](https://img.shields.io/badge/-continous-green)                                                                | [code](./examples/dm_control/)  |
+| [Omniverse Isaac Gym](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs)<br>  <img width="300px" height="auto" src="https://user-images.githubusercontent.com/34286328/171454189-6afafbff-bb61-4aac-b518-24646007cb9f.gif"> |                                                              ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                                                              |    [code](./examples/isaac/)    |
+|                                                   [Snake](http://www.jidiai.cn/env_detail?envid=1)<br>  <img width="300px" height="auto" src="./docs/images/snakes_1v1.gif">                                                   |                                 ![selfplay](https://img.shields.io/badge/-selfplay-blue)  ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                                 |    [code](./examples/snake/)    |
+|                                                          [GridWorld](./examples/gridworld/)<br>  <img width="300px" height="auto" src="./docs/images/gridworld.jpg">                                                           |                                                              ![discrete](https://img.shields.io/badge/-discrete-brightgreen)                                                              |  [code](./examples/gridworld/)  |
+|     [Super Mario Bros](https://github.com/Kautenja/gym-super-mario-bros)<br>  <img width="300px" height="auto" src="https://user-images.githubusercontent.com/2184469/40948820-3d15e5c2-6830-11e8-81d4-ecfaffee0a14.png">      |                                   ![discrete](https://img.shields.io/badge/-discrete-brightgreen)     ![image](https://img.shields.io/badge/-image-red)                                   | [code](./examples/super_mario/) |
+|                                                     [Gym Retro](https://github.com/openai/retro)<br>  <img width="300px" height="auto" src="./docs/images/gym-retro.jpg">                                                      |                                   ![discrete](https://img.shields.io/badge/-discrete-brightgreen)     ![image](https://img.shields.io/badge/-image-red)                                   |    [code](./examples/retro/)    |
 </div>
diff --git a/README.md b/README.md
@@ -104,7 +104,8 @@ Environments currently supported by OpenRL (for more details, please refer to [G
 - [Atari](https://gymnasium.farama.org/environments/atari/)
 - [StarCraft II](https://github.com/oxwhirl/smac)
 - [Omniverse Isaac Gym](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs)
--    [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)
+- [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)
+- [Snake](http://www.jidiai.cn/env_detail?envid=1)
 - [GridWorld](./examples/gridworld/)
 - [Super Mario Bros](https://github.com/Kautenja/gym-super-mario-bros)
 - [Gym Retro](https://github.com/openai/retro)

diff --git a/README_zh.md b/README_zh.md
@@ -86,7 +86,8 @@ OpenRL目前支持的环境（更多详情请参考 [Gallery](Gallery.md)）：
 - [Atari](https://gymnasium.farama.org/environments/atari/)
 - [StarCraft II](https://github.com/oxwhirl/smac)
 - [Omniverse Isaac Gym](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs)
--    [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)
+- [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)
+- [Snake](http://www.jidiai.cn/env_detail?envid=1)
 - [GridWorld](./examples/gridworld/)
 - [Super Mario Bros](https://github.com/Kautenja/gym-super-mario-bros)
 - [Gym Retro](https://github.com/openai/retro)

diff --git a/docs/images/snakes_1v1.gif b/docs/images/snakes_1v1.gif
diff --git a/examples/dm_control/train_ppo.py b/examples/dm_control/train_ppo.py
@@ -4,10 +4,9 @@
 from openrl.configs.config import create_config_parser
 from openrl.envs.common import make
 from openrl.envs.wrappers.base_wrapper import BaseWrapper
-from openrl.envs.wrappers.extra_wrappers import GIFWrapper
+from openrl.envs.wrappers.extra_wrappers import FrameSkip, GIFWrapper
 from openrl.modules.common import PPONet as Net
 from openrl.runners.common import PPOAgent as Agent
-from openrl.envs.wrappers.extra_wrappers import FrameSkip
 
 env_name = "dm_control/cartpole-balance-v0"
 # env_name = "dm_control/walker-walk-v0"

diff --git a/examples/smac/README.md b/examples/smac/README.md
@@ -11,4 +11,7 @@ Installation guide for Linux:
 
 Train SMAC with [MAPPO](https://arxiv.org/abs/2103.01955) algorithm:
 
-`python train_ppo.py --config smac_ppo.yaml`
+`python train_ppo.py --config smac_ppo.yaml`
+
+## Render replay on Mac
+
diff --git a/examples/snake/README.md b/examples/snake/README.md
@@ -0,0 +1,17 @@
+
+This is the example for the snake game.
+
+## Usage
+
+```bash
+python train_selfplay.py
+```
+
+
+## Submit to JiDi
+
+Submition site: http://www.jidiai.cn/env_detail?envid=1.
+
+Snake senarios: [here](https://github.com/jidiai/ai_lib/blob/7a6986f0cb543994277103dbf605e9575d59edd6/env/config.json#L94)
+Original Snake environment: [here](https://github.com/jidiai/ai_lib/blob/master/env/snakes.py)
+
diff --git a/examples/snake/selfplay.yaml b/examples/snake/selfplay.yaml
@@ -0,0 +1,3 @@
+seed: 0
+callbacks:
+  - id: "ProgressBarCallback"
diff --git a/examples/snake/submissions/random_agent/submission.py b/examples/snake/submissions/random_agent/submission.py
@@ -0,0 +1,29 @@
+# -*- coding:utf-8  -*-
+def sample_single_dim(action_space_list_each, is_act_continuous):
+    if is_act_continuous:
+        each = action_space_list_each.sample()
+    else:
+        if action_space_list_each.__class__.__name__ == "Discrete":
+            each = [0] * action_space_list_each.n
+            idx = action_space_list_each.sample()
+            each[idx] = 1
+        elif action_space_list_each.__class__.__name__ == "MultiDiscreteParticle":
+            each = []
+            nvec = action_space_list_each.high - action_space_list_each.low + 1
+            sample_indexes = action_space_list_each.sample()
+
+            for i in range(len(nvec)):
+                dim = nvec[i]
+                new_action = [0] * dim
+                index = sample_indexes[i]
+                new_action[index] = 1
+                each.extend(new_action)
+    return each
+
+
+def my_controller(observation, action_space, is_act_continuous):
+    joint_action = []
+    for i in range(len(action_space)):
+        player = sample_single_dim(action_space[i], is_act_continuous)
+        joint_action.append(player)
+    return joint_action
diff --git a/examples/snake/test_env.py b/examples/snake/test_env.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2023 The OpenRL Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""""""
+import time
+
+import numpy as np
+from wrappers import ConvertObs
+
+from openrl.envs.snake.snake import SnakeEatBeans
+from openrl.envs.snake.snake_pettingzoo import SnakeEatBeansAECEnv
+from openrl.selfplay.wrappers.random_opponent_wrapper import RandomOpponentWrapper
+
+
+def test_raw_env():
+    env = SnakeEatBeans()
+
+    obs, info = env.reset()
+
+    done = False
+    while not np.any(done):
+        a1 = np.zeros(4)
+        a1[env.action_space.sample()] = 1
+        a2 = np.zeros(4)
+        a2[env.action_space.sample()] = 1
+        obs, reward, done, info = env.step([a1, a2])
+        print("obs:", obs)
+        print("reward:", reward)
+        print("done:", done)
+        print("info:", info)
+
+
+def test_aec_env():
+    from PIL import Image
+
+    img_list = []
+    env = SnakeEatBeansAECEnv(render_mode="rgb_array")
+    env.reset(seed=0)
+    # time.sleep(1)
+    img = env.render()
+    img_list.append(img)
+    step = 0
+    for player_name in env.agent_iter():
+        if step > 20:
+            break
+        observation, reward, termination, truncation, info = env.last()
+        if termination or truncation:
+            break
+        action = env.action_space(player_name).sample()
+        # if player_name == "player_0":
+        #     action = 2
+        # elif player_name == "player_1":
+        #     action = 3
+        # else:
+        #     raise ValueError("Unknown player name: {}".format(player_name))
+        env.step(action)
+        img = env.render()
+        if player_name == "player_0":
+            img_list.append(img)
+        # time.sleep(1)
+
+        step += 1
+    print("Total steps: {}".format(step))
+
+    save_path = "test.gif"
+    img_list = [Image.fromarray(img) for img in img_list]
+    img_list[0].save(save_path, save_all=True, append_images=img_list[1:], duration=500)
+
+
+def test_vec_env():
+    from openrl.envs.common import make
+
+    env = make(
+        "snakes_1v1",
+        opponent_wrappers=[
+            RandomOpponentWrapper,
+        ],
+        env_wrappers=[ConvertObs],
+        render_mode="group_human",
+        env_num=2,
+    )
+    obs, info = env.reset()
+    step = 0
+    done = False
+    while not np.any(done):
+        action = env.random_action()
+        obs, reward, done, info = env.step(action)
+        time.sleep(0.3)
+        step += 1
+    print("Total steps: {}".format(step))
+
+
+if __name__ == "__main__":
+    test_vec_env()