Skip to content

Commit

Permalink
add snake environment
Browse files Browse the repository at this point in the history
add snake environment
  • Loading branch information
huangshiyu13 authored Aug 18, 2023
2 parents 6300cc6 + 401569b commit f06c9b1
Show file tree
Hide file tree
Showing 36 changed files with 2,931 additions and 82 deletions.
29 changes: 15 additions & 14 deletions Gallery.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,19 @@ Users are also welcome to contribute their own training examples and demos to th

<div align="center">

| Environment/Demo | Tags | Refs |
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------:|:-------------------------------:|
| [MuJoCo](https://github.com/deepmind/mujoco)<br> <img width="300px" height="auto" src="./docs/images/mujoco.png"> | ![continuous](https://img.shields.io/badge/-continous-green) | [code](./examples/mujoco/) |
| [CartPole](https://gymnasium.farama.org/environments/classic_control/cart_pole/)<br> <img width="300px" height="auto" src="./docs/images/cartpole.png"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | [code](./examples/cartpole/) |
| [MPE: Simple Spread](https://pettingzoo.farama.org/environments/mpe/simple_spread/)<br> <img width="300px" height="auto" src="./docs/images/simple_spread_trained.gif"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) ![MARL](https://img.shields.io/badge/-MARL-yellow) | [code](./examples/mpe/) |
| [StarCraft II](https://github.com/oxwhirl/smac)<br> <img width="300px" height="auto" src="./docs/images/smac.png"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) ![MARL](https://img.shields.io/badge/-MARL-yellow) | [code](./examples/smac/) |
| [Chat Bot](https://openrl-docs.readthedocs.io/en/latest/quick_start/train_nlp.html)<br> <img width="300px" height="auto" src="./docs/images/chat.gif"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) ![NLP](https://img.shields.io/badge/-NLP-green) ![Transformer](https://img.shields.io/badge/-Transformer-blue) | [code](./examples/nlp/) |
| [Atari Pong](https://gymnasium.farama.org/environments/atari/pong/)<br> <img width="300px" height="auto" src="./docs/images/pong.png"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) ![image](https://img.shields.io/badge/-image-red) | [code](./examples/atari/) |
| [PettingZoo: Tic-Tac-Toe](https://pettingzoo.farama.org/environments/classic/tictactoe/)<br> <img width="300px" height="auto" src="./docs/images/tic-tac-toe.jpeg"> | ![selfplay](https://img.shields.io/badge/-selfplay-blue) ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | [code](./examples/selfplay/) |
| [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)<br> <img width="300px" height="auto" src="https://shimmy.farama.org/_images/dm_locomotion.png"> | ![continuous](https://img.shields.io/badge/-continous-green) | [code](./examples/dm_control/) |
| [Omniverse Isaac Gym](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs)<br> <img width="300px" height="auto" src="https://user-images.githubusercontent.com/34286328/171454189-6afafbff-bb61-4aac-b518-24646007cb9f.gif"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | [code](./examples/isaac/) |
| [GridWorld](./examples/gridworld/)<br> <img width="300px" height="auto" src="./docs/images/gridworld.jpg"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | [code](./examples/gridworld/) |
| [Super Mario Bros](https://github.com/Kautenja/gym-super-mario-bros)<br> <img width="300px" height="auto" src="https://user-images.githubusercontent.com/2184469/40948820-3d15e5c2-6830-11e8-81d4-ecfaffee0a14.png"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) ![image](https://img.shields.io/badge/-image-red) | [code](./examples/super_mario/) |
| [Gym Retro](https://github.com/openai/retro)<br> <img width="300px" height="auto" src="./docs/images/gym-retro.jpg"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) ![image](https://img.shields.io/badge/-image-red) | [code](./examples/retro/) |
| Environment/Demo | Tags | Refs |
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-------------------------------:|
| [MuJoCo](https://github.com/deepmind/mujoco)<br> <img width="300px" height="auto" src="./docs/images/mujoco.png"> | ![continuous](https://img.shields.io/badge/-continous-green) | [code](./examples/mujoco/) |
| [CartPole](https://gymnasium.farama.org/environments/classic_control/cart_pole/)<br> <img width="300px" height="auto" src="./docs/images/cartpole.png"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | [code](./examples/cartpole/) |
| [MPE: Simple Spread](https://pettingzoo.farama.org/environments/mpe/simple_spread/)<br> <img width="300px" height="auto" src="./docs/images/simple_spread_trained.gif"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) ![MARL](https://img.shields.io/badge/-MARL-yellow) | [code](./examples/mpe/) |
| [StarCraft II](https://github.com/oxwhirl/smac)<br> <img width="300px" height="auto" src="./docs/images/smac.png"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) ![MARL](https://img.shields.io/badge/-MARL-yellow) | [code](./examples/smac/) |
| [Chat Bot](https://openrl-docs.readthedocs.io/en/latest/quick_start/train_nlp.html)<br> <img width="300px" height="auto" src="./docs/images/chat.gif"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) ![NLP](https://img.shields.io/badge/-NLP-green) ![Transformer](https://img.shields.io/badge/-Transformer-blue) | [code](./examples/nlp/) |
| [Atari Pong](https://gymnasium.farama.org/environments/atari/pong/)<br> <img width="300px" height="auto" src="./docs/images/pong.png"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) ![image](https://img.shields.io/badge/-image-red) | [code](./examples/atari/) |
| [PettingZoo: Tic-Tac-Toe](https://pettingzoo.farama.org/environments/classic/tictactoe/)<br> <img width="300px" height="auto" src="./docs/images/tic-tac-toe.jpeg"> | ![selfplay](https://img.shields.io/badge/-selfplay-blue) ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | [code](./examples/selfplay/) |
| [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)<br> <img width="300px" height="auto" src="https://shimmy.farama.org/_images/dm_locomotion.png"> | ![continuous](https://img.shields.io/badge/-continous-green) | [code](./examples/dm_control/) |
| [Omniverse Isaac Gym](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs)<br> <img width="300px" height="auto" src="https://user-images.githubusercontent.com/34286328/171454189-6afafbff-bb61-4aac-b518-24646007cb9f.gif"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | [code](./examples/isaac/) |
| [Snake](http://www.jidiai.cn/env_detail?envid=1)<br> <img width="300px" height="auto" src="./docs/images/snakes_1v1.gif"> | ![selfplay](https://img.shields.io/badge/-selfplay-blue) ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | [code](./examples/snake/) |
| [GridWorld](./examples/gridworld/)<br> <img width="300px" height="auto" src="./docs/images/gridworld.jpg"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) | [code](./examples/gridworld/) |
| [Super Mario Bros](https://github.com/Kautenja/gym-super-mario-bros)<br> <img width="300px" height="auto" src="https://user-images.githubusercontent.com/2184469/40948820-3d15e5c2-6830-11e8-81d4-ecfaffee0a14.png"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) ![image](https://img.shields.io/badge/-image-red) | [code](./examples/super_mario/) |
| [Gym Retro](https://github.com/openai/retro)<br> <img width="300px" height="auto" src="./docs/images/gym-retro.jpg"> | ![discrete](https://img.shields.io/badge/-discrete-brightgreen) ![image](https://img.shields.io/badge/-image-red) | [code](./examples/retro/) |
</div>
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ Environments currently supported by OpenRL (for more details, please refer to [G
- [Atari](https://gymnasium.farama.org/environments/atari/)
- [StarCraft II](https://github.com/oxwhirl/smac)
- [Omniverse Isaac Gym](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs)
- [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)
- [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)
- [Snake](http://www.jidiai.cn/env_detail?envid=1)
- [GridWorld](./examples/gridworld/)
- [Super Mario Bros](https://github.com/Kautenja/gym-super-mario-bros)
- [Gym Retro](https://github.com/openai/retro)
Expand Down
3 changes: 2 additions & 1 deletion README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ OpenRL目前支持的环境(更多详情请参考 [Gallery](Gallery.md)):
- [Atari](https://gymnasium.farama.org/environments/atari/)
- [StarCraft II](https://github.com/oxwhirl/smac)
- [Omniverse Isaac Gym](https://github.com/NVIDIA-Omniverse/OmniIsaacGymEnvs)
- [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)
- [DeepMind Control](https://shimmy.farama.org/environments/dm_control/)
- [Snake](http://www.jidiai.cn/env_detail?envid=1)
- [GridWorld](./examples/gridworld/)
- [Super Mario Bros](https://github.com/Kautenja/gym-super-mario-bros)
- [Gym Retro](https://github.com/openai/retro)
Expand Down
Binary file added docs/images/snakes_1v1.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 1 addition & 2 deletions examples/dm_control/train_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@
from openrl.configs.config import create_config_parser
from openrl.envs.common import make
from openrl.envs.wrappers.base_wrapper import BaseWrapper
from openrl.envs.wrappers.extra_wrappers import GIFWrapper
from openrl.envs.wrappers.extra_wrappers import FrameSkip, GIFWrapper
from openrl.modules.common import PPONet as Net
from openrl.runners.common import PPOAgent as Agent
from openrl.envs.wrappers.extra_wrappers import FrameSkip

env_name = "dm_control/cartpole-balance-v0"
# env_name = "dm_control/walker-walk-v0"
Expand Down
5 changes: 4 additions & 1 deletion examples/smac/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,7 @@ Installation guide for Linux:

Train SMAC with [MAPPO](https://arxiv.org/abs/2103.01955) algorithm:

`python train_ppo.py --config smac_ppo.yaml`
`python train_ppo.py --config smac_ppo.yaml`

## Render replay on Mac

17 changes: 17 additions & 0 deletions examples/snake/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@

This is the example for the snake game.

## Usage

```bash
python train_selfplay.py
```


## Submit to JiDi

Submition site: http://www.jidiai.cn/env_detail?envid=1.

Snake senarios: [here](https://github.com/jidiai/ai_lib/blob/7a6986f0cb543994277103dbf605e9575d59edd6/env/config.json#L94)
Original Snake environment: [here](https://github.com/jidiai/ai_lib/blob/master/env/snakes.py)

3 changes: 3 additions & 0 deletions examples/snake/selfplay.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
seed: 0
callbacks:
- id: "ProgressBarCallback"
29 changes: 29 additions & 0 deletions examples/snake/submissions/random_agent/submission.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# -*- coding:utf-8 -*-
def sample_single_dim(action_space_list_each, is_act_continuous):
if is_act_continuous:
each = action_space_list_each.sample()
else:
if action_space_list_each.__class__.__name__ == "Discrete":
each = [0] * action_space_list_each.n
idx = action_space_list_each.sample()
each[idx] = 1
elif action_space_list_each.__class__.__name__ == "MultiDiscreteParticle":
each = []
nvec = action_space_list_each.high - action_space_list_each.low + 1
sample_indexes = action_space_list_each.sample()

for i in range(len(nvec)):
dim = nvec[i]
new_action = [0] * dim
index = sample_indexes[i]
new_action[index] = 1
each.extend(new_action)
return each


def my_controller(observation, action_space, is_act_continuous):
joint_action = []
for i in range(len(action_space)):
player = sample_single_dim(action_space[i], is_act_continuous)
joint_action.append(player)
return joint_action
107 changes: 107 additions & 0 deletions examples/snake/test_env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2023 The OpenRL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

""""""
import time

import numpy as np
from wrappers import ConvertObs

from openrl.envs.snake.snake import SnakeEatBeans
from openrl.envs.snake.snake_pettingzoo import SnakeEatBeansAECEnv
from openrl.selfplay.wrappers.random_opponent_wrapper import RandomOpponentWrapper


def test_raw_env():
env = SnakeEatBeans()

obs, info = env.reset()

done = False
while not np.any(done):
a1 = np.zeros(4)
a1[env.action_space.sample()] = 1
a2 = np.zeros(4)
a2[env.action_space.sample()] = 1
obs, reward, done, info = env.step([a1, a2])
print("obs:", obs)
print("reward:", reward)
print("done:", done)
print("info:", info)


def test_aec_env():
from PIL import Image

img_list = []
env = SnakeEatBeansAECEnv(render_mode="rgb_array")
env.reset(seed=0)
# time.sleep(1)
img = env.render()
img_list.append(img)
step = 0
for player_name in env.agent_iter():
if step > 20:
break
observation, reward, termination, truncation, info = env.last()
if termination or truncation:
break
action = env.action_space(player_name).sample()
# if player_name == "player_0":
# action = 2
# elif player_name == "player_1":
# action = 3
# else:
# raise ValueError("Unknown player name: {}".format(player_name))
env.step(action)
img = env.render()
if player_name == "player_0":
img_list.append(img)
# time.sleep(1)

step += 1
print("Total steps: {}".format(step))

save_path = "test.gif"
img_list = [Image.fromarray(img) for img in img_list]
img_list[0].save(save_path, save_all=True, append_images=img_list[1:], duration=500)


def test_vec_env():
from openrl.envs.common import make

env = make(
"snakes_1v1",
opponent_wrappers=[
RandomOpponentWrapper,
],
env_wrappers=[ConvertObs],
render_mode="group_human",
env_num=2,
)
obs, info = env.reset()
step = 0
done = False
while not np.any(done):
action = env.random_action()
obs, reward, done, info = env.step(action)
time.sleep(0.3)
step += 1
print("Total steps: {}".format(step))


if __name__ == "__main__":
test_vec_env()
Loading

0 comments on commit f06c9b1

Please sign in to comment.