Major update: added working UI and interactive play, supports human v…

…s cpu, human vs human, cpu vs cpu
elliottower · Jan 30, 2023 · 50e73d7 · 50e73d7
1 parent e6c234a
commit 50e73d7
Show file tree

Hide file tree

Showing 23 changed files with 481 additions and 155 deletions.
diff --git a/README.md b/README.md
@@ -31,9 +31,9 @@ In a Python shell, run the following:
 
 ```python
 import pettingzoo
-from gobblet import gobblet_v0
+from gobblet import gobblet_v1
 
-env = gobblet_v0.env()
+env = gobblet_v1.env()
 ```
 
 ### Train a DQL agent with Tianshou

diff --git a/environment.yml b/environment.yml
@@ -11,9 +11,9 @@ dependencies:
   - blinker=1.5=pyhd8ed1ab_0
   - brotlipy=0.7.0=py38hef030d1_1005
   - c-ares=1.18.1=h0d85af4_0
-  - ca-certificates=2022.12.7=h033912b_0
+  - ca-certificates=2023.01.10=hecd8cb5_0
   - cachetools=5.3.0=pyhd8ed1ab_0
-  - certifi=2022.12.7=pyhd8ed1ab_0
+  - certifi=2022.12.7=py38hecd8cb5_0
   - cffi=1.15.1=py38hb368cf1_3
   - charset-normalizer=2.1.1=pyhd8ed1ab_0
   - click=8.1.3=unix_pyhd8ed1ab_2
@@ -24,13 +24,13 @@ dependencies:
   - google-auth=2.16.0=pyh1a96a4e_1
   - google-auth-oauthlib=0.4.6=pyhd8ed1ab_0
   - grpcio=1.42.0=py38ha29bfda_0
-  - gym=0.26.1=py38h5a36765_0
   - gym-notices=0.0.8=pyhd8ed1ab_0
   - h5py=2.10.0=nompi_py38h106b333_102
   - hdf5=1.10.5=nompi_h500d6d3_1114
   - idna=3.4=pyhd8ed1ab_0
   - importlib-metadata=6.0.0=pyha770c72_0
   - importlib_metadata=6.0.0=hd8ed1ab_0
+  - iniconfig=1.1.1=pyhd3eb1b0_0
   - libblas=3.9.0=16_osx64_openblas
   - libcblas=3.9.0=16_osx64_openblas
   - libcxx=14.0.6=h9765a3e_0
@@ -52,16 +52,18 @@ dependencies:
   - numba=0.56.4=py38hab356c4_0
   - numpy=1.23.5=py38hc2f29e8_0
   - oauthlib=3.2.2=pyhd8ed1ab_0
-  - openssl=1.1.1s=hfd90126_1
+  - openssl=1.1.1s=hca72f7f_0
   - packaging=23.0=pyhd8ed1ab_0
   - pip=22.3.1=py38hecd8cb5_0
-  - protobuf=3.20.1=py38he9d5cce_0
+  - pluggy=1.0.0=py38hecd8cb5_1
+  - py=1.11.0=pyhd3eb1b0_0
   - pyasn1=0.4.8=py_0
   - pyasn1-modules=0.2.7=py_0
   - pycparser=2.21=pyhd8ed1ab_0
   - pyjwt=2.6.0=pyhd8ed1ab_0
   - pyopenssl=23.0.0=pyhd8ed1ab_0
   - pysocks=1.7.1=pyha2e5f31_6
+  - pytest=7.1.2=py38hecd8cb5_0
   - python=3.8.16=h218abb5_2
   - python_abi=3.8=2_cp38
   - pytorch=1.12.1=cpu_py38h03065b0_0
@@ -80,6 +82,7 @@ dependencies:
   - tensorboard-plugin-wit=1.8.1=pyhd8ed1ab_0
   - tianshou=0.4.11=pyhd8ed1ab_0
   - tk=8.6.12=h5d9f67b_0
+  - tomli=2.0.1=py38hecd8cb5_0
   - tqdm=4.64.1=pyhd8ed1ab_0
   - typing-extensions=4.4.0=hd8ed1ab_0
   - typing_extensions=4.4.0=pyha770c72_0
@@ -91,13 +94,97 @@ dependencies:
   - zipp=3.11.0=pyhd8ed1ab_0
   - zlib=1.2.13=h4dc903c_0
   - pip:
-    - chess==1.7.0
-    - gymnasium==0.27.1
-    - gymnasium-notices==0.0.1
-    - hanabi-learning-environment==0.0.4
-    - jax-jumpy==0.2.0
-    - pettingzoo==1.22.3
-    - pygame==2.1.3.dev8
-    - rlcard==1.0.5
-    - termcolor==2.2.0
+      - aiohttp-cors==0.7.0
+      - astunparse==1.6.3
+      - bleach==6.0.0
+      - blessed==1.19.1
+      - build==0.10.0
+      - chess==1.7.0
+      - colorful==0.5.5
+      - contourpy==1.0.7
+      - cycler==0.11.0
+      - decorator==5.1.1
+      - distlib==0.3.6
+      - dm-tree==0.1.8
+      - docopt==0.6.2
+      - docutils==0.19
+      - filelock==3.9.0
+      - flatbuffers==23.1.21
+      - fonttools==4.38.0
+      - gast==0.4.0
+      - gobblet-rl==1.0.2
+      - google-api-core==2.11.0
+      - google-pasta==0.2.0
+      - googleapis-common-protos==1.58.0
+      - gpustat==1.0.0
+      - gym==0.23.1
+      - gymnasium==0.27.1
+      - gymnasium-notices==0.0.1
+      - hanabi-learning-environment==0.0.4
+      - imageio==2.25.0
+      - importlib-resources==5.10.2
+      - jaraco-classes==3.2.3
+      - jax-jumpy==0.2.0
+      - jsonschema==4.17.3
+      - keras==2.11.0
+      - keyring==23.13.1
+      - kiwisolver==1.4.4
+      - libclang==15.0.6.1
+      - lz4==4.3.2
+      - markdown-it-py==2.1.0
+      - matplotlib==3.6.3
+      - mdurl==0.1.2
+      - more-itertools==9.0.0
+      - msgpack==1.0.4
+      - networkx==3.0
+      - nvidia-ml-py==11.495.46
+      - opencensus==0.11.1
+      - opencensus-context==0.1.3
+      - opencv-python==4.7.0.68
+      - opt-einsum==3.3.0
+      - pandas==1.5.3
+      - pettingzoo==1.22.3
+      - pillow==9.4.0
+      - pipreqs==0.4.11
+      - pkginfo==1.9.6
+      - pkgutil-resolve-name==1.3.10
+      - platformdirs==2.6.2
+      - prometheus-client==0.16.0
+      - protobuf==3.19.6
+      - psutil==5.9.4
+      - py-spy==0.3.14
+      - pydantic==1.10.4
+      - pygame==2.1.3.dev8
+      - pygments==2.14.0
+      - pyparsing==3.0.9
+      - pyproject-hooks==1.0.0
+      - pyrsistent==0.19.3
+      - python-dateutil==2.8.2
+      - pytz==2022.7.1
+      - pywavelets==1.4.1
+      - pyyaml==6.0
+      - ray==3.0.0.dev0
+      - readme-renderer==37.3
+      - requests-toolbelt==0.10.1
+      - rfc3986==2.0.0
+      - rich==13.3.0
+      - rlcard==1.0.5
+      - scikit-image==0.19.3
+      - scipy==1.10.0
+      - smart-open==6.3.0
+      - tabulate==0.9.0
+      - tensorboardx==2.5.1
+      - tensorflow==2.11.0
+      - tensorflow-estimator==2.11.0
+      - tensorflow-io-gcs-filesystem==0.30.0
+      - tensorflow-probability==0.19.0
+      - termcolor==2.2.0
+      - tifffile==2023.1.23.1
+      - twine==4.0.2
+      - typer==0.7.0
+      - virtualenv==20.17.1
+      - wcwidth==0.2.6
+      - webencodings==0.5.1
+      - wrapt==1.14.1
+      - yarg==0.1.9
 prefix: /Users/elliottower/anaconda3/envs/gobblet-rl
diff --git a/gobblet/examples/__pycache__/utils.cpython-38.pyc b/gobblet/examples/__pycache__/utils.cpython-38.pyc
diff --git a/gobblet/examples/example_DQN_tianshou.py b/gobblet/examples/example_DQN_tianshou.py
@@ -29,7 +29,7 @@
 from tianshou.utils.net.common import Net
 from torch.utils.tensorboard import SummaryWriter
 
-from gobblet import gobblet_v0
+from gobblet import gobblet_v1
 
 
 def get_parser() -> argparse.ArgumentParser:
@@ -156,7 +156,7 @@ def get_agents(
 
 
 def get_env(render_mode=None, debug=False):
-    return PettingZooEnv(gobblet_v0.env(render_mode=render_mode, debug=debug))
+    return PettingZooEnv(gobblet_v1.env(render_mode=render_mode, debug=debug))
 
 
 def train_agent(

diff --git a/gobblet/examples/example_RLlib.py b/gobblet/examples/example_RLlib.py
@@ -11,7 +11,7 @@
 from ray.tune.logger import pretty_print
 from ray.tune.registry import register_env
 
-from gobblet import gobblet_v0
+from gobblet import gobblet_v1
 from gobblet.models.action_mask_model import TorchActionMaskModel
 from gobblet.utils import get_project_root
 
@@ -23,7 +23,7 @@ def prepare_train() -> Tuple[ppo.PPOTrainer, PettingZooEnv]:
 
     # get the Pettingzoo env
     def env_creator():
-        env = env = gobblet_v0.env(render_mode=None, debug=False)
+        env = env = gobblet_v1.env(render_mode=None, debug=False)
         return env
 
     register_env(env_name, lambda config: PettingZooEnv(env_creator()))

diff --git a/gobblet/examples/example_basic.py b/gobblet/examples/example_basic.py
@@ -1,14 +1,13 @@
-from gobblet import gobblet_v0
+from gobblet import gobblet_v1
 import argparse
 import numpy as np
 import time
-import pygame
-import sys
+
 
 def get_parser() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        "--render-mode", type=str, default="human", help="options: human, console, console_full"
+        "--render-mode", type=str, default="human", help="options: human, text, text_full"
     )
     parser.add_argument(
         "--agent-type", type=str, default="random_admissible", help="options: random, random_admissible"
@@ -19,12 +18,6 @@ def get_parser() -> argparse.ArgumentParser:
     parser.add_argument(
         "--debug", action="store_true", help="display extra debugging information"
     )
-    parser.add_argument(
-        "--no-cpu", action="store_true", help="disable CPU players and play as both teams"
-    )
-    parser.add_argument(
-        "--player", type=int, default=0, help="which player to play as"
-    )
 
     return parser
 
@@ -36,7 +29,7 @@ def get_args() -> argparse.Namespace:
     # train the agent and watch its performance in a match!
     args = get_args()
 
-    env = gobblet_v0.env(render_mode=args.render_mode, debug=args.debug)
+    env = gobblet_v1.env(render_mode=args.render_mode, debug=args.debug)
     if args.seed is not None:
         env.reset(seed=args.seed)
         np.random.seed(args.seed)
@@ -57,40 +50,5 @@ def get_args() -> argparse.Namespace:
             if args.agent_type == "random_admissible":
                 action_mask = observation['action_mask']
                 action = np.random.choice(np.arange(len(action_mask)), p=action_mask / np.sum(action_mask))
-            if agent == env.agents[args.player] or args.no_cpu:
-                while True:
-                    event = pygame.event.wait()
-                    if event.type == pygame.QUIT:
-                        pygame.quit()
-                        pygame.display.quit()
-                        sys.exit()
-                    mousex, mousey = pygame.mouse.get_pos()
-                    if  50 <= mousex < 220:
-                        action = 0
-                    elif 220 <= mousex < 390:
-                        action = 1
-                    elif 390 <= mousex < 560:
-                        action = 2
-                    elif 560 <= mousex < 730:
-                        action = 3
-                    elif 730 <= mousex < 900:
-                        action = 4
-                    elif 900 <= mousex < 1070:
-                        action = 5
-                    elif 1070 <= mousex < 1240:
-                        action = 8
-                    piece_size = 1 # hard code to get previews of large pieces
-                    env.unwrapped.board.squares_preview[:] = 0
-                    env.unwrapped.board.squares_preview[action * piece_size] = 1
-                    env.render()
-                    pygame.display.update()
-                    print(env.unwrapped.board.squares_preview)
-                    print(f"pos: {mousex}, {mousey}")
-                    if event.type == pygame.MOUSEBUTTONDOWN:
-                        env.unwrapped.board.squares_preview[action * piece_size] = 0
-                        break
-
-
             time.sleep(.1)
-            env.step(action)
-
+            env.step(action)