diff --git a/README.md b/README.md index 52d35ba..22677ff 100644 --- a/README.md +++ b/README.md @@ -31,9 +31,9 @@ In a Python shell, run the following: ```python import pettingzoo -from gobblet import gobblet_v0 +from gobblet import gobblet_v1 -env = gobblet_v0.env() +env = gobblet_v1.env() ``` ### Train a DQL agent with Tianshou diff --git a/environment.yml b/environment.yml index c4fbf92..acedcb8 100644 --- a/environment.yml +++ b/environment.yml @@ -11,9 +11,9 @@ dependencies: - blinker=1.5=pyhd8ed1ab_0 - brotlipy=0.7.0=py38hef030d1_1005 - c-ares=1.18.1=h0d85af4_0 - - ca-certificates=2022.12.7=h033912b_0 + - ca-certificates=2023.01.10=hecd8cb5_0 - cachetools=5.3.0=pyhd8ed1ab_0 - - certifi=2022.12.7=pyhd8ed1ab_0 + - certifi=2022.12.7=py38hecd8cb5_0 - cffi=1.15.1=py38hb368cf1_3 - charset-normalizer=2.1.1=pyhd8ed1ab_0 - click=8.1.3=unix_pyhd8ed1ab_2 @@ -24,13 +24,13 @@ dependencies: - google-auth=2.16.0=pyh1a96a4e_1 - google-auth-oauthlib=0.4.6=pyhd8ed1ab_0 - grpcio=1.42.0=py38ha29bfda_0 - - gym=0.26.1=py38h5a36765_0 - gym-notices=0.0.8=pyhd8ed1ab_0 - h5py=2.10.0=nompi_py38h106b333_102 - hdf5=1.10.5=nompi_h500d6d3_1114 - idna=3.4=pyhd8ed1ab_0 - importlib-metadata=6.0.0=pyha770c72_0 - importlib_metadata=6.0.0=hd8ed1ab_0 + - iniconfig=1.1.1=pyhd3eb1b0_0 - libblas=3.9.0=16_osx64_openblas - libcblas=3.9.0=16_osx64_openblas - libcxx=14.0.6=h9765a3e_0 @@ -52,16 +52,18 @@ dependencies: - numba=0.56.4=py38hab356c4_0 - numpy=1.23.5=py38hc2f29e8_0 - oauthlib=3.2.2=pyhd8ed1ab_0 - - openssl=1.1.1s=hfd90126_1 + - openssl=1.1.1s=hca72f7f_0 - packaging=23.0=pyhd8ed1ab_0 - pip=22.3.1=py38hecd8cb5_0 - - protobuf=3.20.1=py38he9d5cce_0 + - pluggy=1.0.0=py38hecd8cb5_1 + - py=1.11.0=pyhd3eb1b0_0 - pyasn1=0.4.8=py_0 - pyasn1-modules=0.2.7=py_0 - pycparser=2.21=pyhd8ed1ab_0 - pyjwt=2.6.0=pyhd8ed1ab_0 - pyopenssl=23.0.0=pyhd8ed1ab_0 - pysocks=1.7.1=pyha2e5f31_6 + - pytest=7.1.2=py38hecd8cb5_0 - python=3.8.16=h218abb5_2 - python_abi=3.8=2_cp38 - pytorch=1.12.1=cpu_py38h03065b0_0 @@ -80,6 +82,7 @@ dependencies: - tensorboard-plugin-wit=1.8.1=pyhd8ed1ab_0 - tianshou=0.4.11=pyhd8ed1ab_0 - tk=8.6.12=h5d9f67b_0 + - tomli=2.0.1=py38hecd8cb5_0 - tqdm=4.64.1=pyhd8ed1ab_0 - typing-extensions=4.4.0=hd8ed1ab_0 - typing_extensions=4.4.0=pyha770c72_0 @@ -91,13 +94,97 @@ dependencies: - zipp=3.11.0=pyhd8ed1ab_0 - zlib=1.2.13=h4dc903c_0 - pip: - - chess==1.7.0 - - gymnasium==0.27.1 - - gymnasium-notices==0.0.1 - - hanabi-learning-environment==0.0.4 - - jax-jumpy==0.2.0 - - pettingzoo==1.22.3 - - pygame==2.1.3.dev8 - - rlcard==1.0.5 - - termcolor==2.2.0 + - aiohttp-cors==0.7.0 + - astunparse==1.6.3 + - bleach==6.0.0 + - blessed==1.19.1 + - build==0.10.0 + - chess==1.7.0 + - colorful==0.5.5 + - contourpy==1.0.7 + - cycler==0.11.0 + - decorator==5.1.1 + - distlib==0.3.6 + - dm-tree==0.1.8 + - docopt==0.6.2 + - docutils==0.19 + - filelock==3.9.0 + - flatbuffers==23.1.21 + - fonttools==4.38.0 + - gast==0.4.0 + - gobblet-rl==1.0.2 + - google-api-core==2.11.0 + - google-pasta==0.2.0 + - googleapis-common-protos==1.58.0 + - gpustat==1.0.0 + - gym==0.23.1 + - gymnasium==0.27.1 + - gymnasium-notices==0.0.1 + - hanabi-learning-environment==0.0.4 + - imageio==2.25.0 + - importlib-resources==5.10.2 + - jaraco-classes==3.2.3 + - jax-jumpy==0.2.0 + - jsonschema==4.17.3 + - keras==2.11.0 + - keyring==23.13.1 + - kiwisolver==1.4.4 + - libclang==15.0.6.1 + - lz4==4.3.2 + - markdown-it-py==2.1.0 + - matplotlib==3.6.3 + - mdurl==0.1.2 + - more-itertools==9.0.0 + - msgpack==1.0.4 + - networkx==3.0 + - nvidia-ml-py==11.495.46 + - opencensus==0.11.1 + - opencensus-context==0.1.3 + - opencv-python==4.7.0.68 + - opt-einsum==3.3.0 + - pandas==1.5.3 + - pettingzoo==1.22.3 + - pillow==9.4.0 + - pipreqs==0.4.11 + - pkginfo==1.9.6 + - pkgutil-resolve-name==1.3.10 + - platformdirs==2.6.2 + - prometheus-client==0.16.0 + - protobuf==3.19.6 + - psutil==5.9.4 + - py-spy==0.3.14 + - pydantic==1.10.4 + - pygame==2.1.3.dev8 + - pygments==2.14.0 + - pyparsing==3.0.9 + - pyproject-hooks==1.0.0 + - pyrsistent==0.19.3 + - python-dateutil==2.8.2 + - pytz==2022.7.1 + - pywavelets==1.4.1 + - pyyaml==6.0 + - ray==3.0.0.dev0 + - readme-renderer==37.3 + - requests-toolbelt==0.10.1 + - rfc3986==2.0.0 + - rich==13.3.0 + - rlcard==1.0.5 + - scikit-image==0.19.3 + - scipy==1.10.0 + - smart-open==6.3.0 + - tabulate==0.9.0 + - tensorboardx==2.5.1 + - tensorflow==2.11.0 + - tensorflow-estimator==2.11.0 + - tensorflow-io-gcs-filesystem==0.30.0 + - tensorflow-probability==0.19.0 + - termcolor==2.2.0 + - tifffile==2023.1.23.1 + - twine==4.0.2 + - typer==0.7.0 + - virtualenv==20.17.1 + - wcwidth==0.2.6 + - webencodings==0.5.1 + - wrapt==1.14.1 + - yarg==0.1.9 prefix: /Users/elliottower/anaconda3/envs/gobblet-rl diff --git a/gobblet/examples/__pycache__/utils.cpython-38.pyc b/gobblet/examples/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000..8bf42d2 Binary files /dev/null and b/gobblet/examples/__pycache__/utils.cpython-38.pyc differ diff --git a/gobblet/examples/example_DQN_tianshou.py b/gobblet/examples/example_DQN_tianshou.py index efbe3bf..13a6b81 100644 --- a/gobblet/examples/example_DQN_tianshou.py +++ b/gobblet/examples/example_DQN_tianshou.py @@ -29,7 +29,7 @@ from tianshou.utils.net.common import Net from torch.utils.tensorboard import SummaryWriter -from gobblet import gobblet_v0 +from gobblet import gobblet_v1 def get_parser() -> argparse.ArgumentParser: @@ -156,7 +156,7 @@ def get_agents( def get_env(render_mode=None, debug=False): - return PettingZooEnv(gobblet_v0.env(render_mode=render_mode, debug=debug)) + return PettingZooEnv(gobblet_v1.env(render_mode=render_mode, debug=debug)) def train_agent( diff --git a/gobblet/examples/example_RLlib.py b/gobblet/examples/example_RLlib.py index b251fc1..d97041f 100644 --- a/gobblet/examples/example_RLlib.py +++ b/gobblet/examples/example_RLlib.py @@ -11,7 +11,7 @@ from ray.tune.logger import pretty_print from ray.tune.registry import register_env -from gobblet import gobblet_v0 +from gobblet import gobblet_v1 from gobblet.models.action_mask_model import TorchActionMaskModel from gobblet.utils import get_project_root @@ -23,7 +23,7 @@ def prepare_train() -> Tuple[ppo.PPOTrainer, PettingZooEnv]: # get the Pettingzoo env def env_creator(): - env = env = gobblet_v0.env(render_mode=None, debug=False) + env = env = gobblet_v1.env(render_mode=None, debug=False) return env register_env(env_name, lambda config: PettingZooEnv(env_creator())) diff --git a/gobblet/examples/example_basic.py b/gobblet/examples/example_basic.py index a7c1412..634912a 100644 --- a/gobblet/examples/example_basic.py +++ b/gobblet/examples/example_basic.py @@ -1,14 +1,13 @@ -from gobblet import gobblet_v0 +from gobblet import gobblet_v1 import argparse import numpy as np import time -import pygame -import sys + def get_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser() parser.add_argument( - "--render-mode", type=str, default="human", help="options: human, console, console_full" + "--render-mode", type=str, default="human", help="options: human, text, text_full" ) parser.add_argument( "--agent-type", type=str, default="random_admissible", help="options: random, random_admissible" @@ -19,12 +18,6 @@ def get_parser() -> argparse.ArgumentParser: parser.add_argument( "--debug", action="store_true", help="display extra debugging information" ) - parser.add_argument( - "--no-cpu", action="store_true", help="disable CPU players and play as both teams" - ) - parser.add_argument( - "--player", type=int, default=0, help="which player to play as" - ) return parser @@ -36,7 +29,7 @@ def get_args() -> argparse.Namespace: # train the agent and watch its performance in a match! args = get_args() - env = gobblet_v0.env(render_mode=args.render_mode, debug=args.debug) + env = gobblet_v1.env(render_mode=args.render_mode, debug=args.debug) if args.seed is not None: env.reset(seed=args.seed) np.random.seed(args.seed) @@ -57,40 +50,5 @@ def get_args() -> argparse.Namespace: if args.agent_type == "random_admissible": action_mask = observation['action_mask'] action = np.random.choice(np.arange(len(action_mask)), p=action_mask / np.sum(action_mask)) - if agent == env.agents[args.player] or args.no_cpu: - while True: - event = pygame.event.wait() - if event.type == pygame.QUIT: - pygame.quit() - pygame.display.quit() - sys.exit() - mousex, mousey = pygame.mouse.get_pos() - if 50 <= mousex < 220: - action = 0 - elif 220 <= mousex < 390: - action = 1 - elif 390 <= mousex < 560: - action = 2 - elif 560 <= mousex < 730: - action = 3 - elif 730 <= mousex < 900: - action = 4 - elif 900 <= mousex < 1070: - action = 5 - elif 1070 <= mousex < 1240: - action = 8 - piece_size = 1 # hard code to get previews of large pieces - env.unwrapped.board.squares_preview[:] = 0 - env.unwrapped.board.squares_preview[action * piece_size] = 1 - env.render() - pygame.display.update() - print(env.unwrapped.board.squares_preview) - print(f"pos: {mousex}, {mousey}") - if event.type == pygame.MOUSEBUTTONDOWN: - env.unwrapped.board.squares_preview[action * piece_size] = 0 - break - - time.sleep(.1) - env.step(action) - + env.step(action) \ No newline at end of file diff --git a/gobblet/examples/example_interactive_pygame.py b/gobblet/examples/example_interactive_pygame.py new file mode 100644 index 0000000..552e57b --- /dev/null +++ b/gobblet/examples/example_interactive_pygame.py @@ -0,0 +1,204 @@ +from gobblet import gobblet_v1 +import argparse +import numpy as np +import time +import pygame +import sys + +def get_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser() + parser.add_argument( + "--render-mode", type=str, default="human", help="options: human, text, text_full" + ) + parser.add_argument( + "--agent-type", type=str, default="random_admissible", help="options: random, random_admissible" + ) + parser.add_argument( + "--seed", type=int, default=None, help="random seed for board and policy" + ) + parser.add_argument( + "--debug", action="store_true", help="display extra debugging information" + ) + parser.add_argument( + "--no-cpu", action="store_true", help="disable CPU players and play as both teams" + ) + parser.add_argument( + "--cpu-only", action="store_true", help="enable CPU only games (no human input)" + ) + parser.add_argument( + "--player", type=int, default=0, help="which player to play as" + ) + parser.add_argument( + "--save_video", action="store_true", help="Save screen recording of gameplay" + ) + + return parser + +def get_args() -> argparse.Namespace: + parser = get_parser() + return parser.parse_known_args()[0] + +if __name__ == "__main__": + # train the agent and watch its performance in a match! + args = get_args() + + env = gobblet_v1.env(render_mode=args.render_mode, debug=args.debug) + if args.seed is not None: + env.reset(seed=args.seed) + np.random.seed(args.seed) + else: + env.reset() + turn = 0 + env.render() # need to render the environment before pygame can take user input + for agent in env.agent_iter(): + observation, reward, termination, truncation, info = env.last() + if termination: + print(f"Termination ({agent}), Reward: {reward}, info: {info}") + env.step(None) + elif truncation: + print("Truncated") + else: + if args.agent_type == "random": + time.sleep(.1) + action = env.action_space(agent).sample() + if args.agent_type == "random_admissible": + time.sleep(.1) + action_mask = observation['action_mask'] + action = np.random.choice(np.arange(len(action_mask)), p=action_mask / np.sum(action_mask)) + if (agent == env.agents[args.player] or args.no_cpu) and not args.cpu_only: + picked_up = False + picked_up_pos = -1 + piece_cycle = 0 + piece_size_selected = 0 + while True: + event = pygame.event.wait() + if event.type == pygame.QUIT: + pygame.quit() + pygame.display.quit() + sys.exit() + mousex, mousey = pygame.mouse.get_pos() + pos_x = 0 + if 0 <= mousex < 360: + pos_x = 0 + elif 360 <= mousex < 640: + pos_x = 1 + elif 640 <= mousex < 640 + 360: + pos_x = 2 + pos_y = 0 + if 0 <= mousey < 360: + pos_y = 0 + elif 360 <= mousey < 640: + pos_y= 1 + elif 640 <= mousey < 640 + 360: + pos_y = 2 + pos = pos_y + 3 * (pos_x) + + agent_multiplier = 1 if agent == env.agents[0] else -1 + + movable_pieces = [ i // 9 for i in observation["action_mask"].nonzero() ] # TODO: allow user to select placed piece to move + placed_pieces = env.unwrapped.board.squares[env.unwrapped.board.squares.nonzero()] + placed_pieces_agent = [a for a in placed_pieces if np.sign(a) == agent_multiplier] + placed_pieces_agent_abs = [abs(p) for p in placed_pieces_agent] + pieces = np.arange(1, 7) + unplaced = [p for p in pieces if p not in placed_pieces_agent_abs] + flatboard = env.unwrapped.board.get_flatboard() + + piece = piece if piece_size_selected > 0 else unplaced[-1] # Choose the largest unplaced piece by default + if event.type == pygame.KEYDOWN: + if event.key == pygame.K_SPACE: + flag = True + # if time.time() - last_keystroke > 0.5: # Only cycle every 0.5 seconds + piece_cycle += 1 + last_keystroke = time.time() + else: + if event.key == pygame.K_1: # User inputs for pieces of size 1 (indices 1 and 2) + piece_cycle = 0 + if 1 in unplaced: + piece = 1 + piece_size_selected = 1 + elif 2 in unplaced: + piece = 2 + piece_size_selected = 1 + else: piece = -1 + elif event.key == pygame.K_2: + piece_cycle = 0 + if 3 in unplaced: + piece = 3 + piece_size_selected = 2 + elif 4 in unplaced: + piece = 4 + piece_size_selected = 2 + else: piece = -1 + elif event.key == pygame.K_3: + piece_cycle = 0 + if 5 in unplaced: + piece = 5 + piece_size_selected = 3 + elif 6 in unplaced: + piece = 6 + piece_size_selected = 3 + else: piece = -1 + # Don't render a preview if both pieces of a given size have been placed + if piece != -1: + piece_size = ( piece + 1 ) // 2 + # Don't render a preview if both pieces of a given size have been placed + if piece_cycle: + if piece_cycle and not picked_up: + cycle_choices = np.unique( + [(p + 1) // 2 for p in unplaced]) # Transform [1,2,3,4,5,6] to [1,2,3) + piece_size = cycle_choices[ + (np.amax(cycle_choices) - (piece_cycle + 1)) % len(cycle_choices)] # Cycle from largest to smallest + + # If the hovered over position means placing a picked up piece in the same spot, mark it as illegal + if pos == picked_up_pos: + action_prev = -1 + + # Get the action from the position the mouse cursor is currently hovering over + action_prev = env.unwrapped.board.get_action(pos, piece_size, env.agents.index(agent)) + + # If the hovered over position means placing a picked up piece in the same spot, mark it as illegal + if pos == picked_up_pos: + action_prev = -1 + + # Clear previously previewed moves + env.unwrapped.board.squares_preview[:] = 0 + if action_prev != -1: + if not env.unwrapped.board.is_legal(action_prev): # If this action is illegal + action_prev = -1 + else: + env.unwrapped.board.squares_preview[pos + 9 * (piece_size-1)] = agent_multiplier # Preview this position + + # Update the display with the previewed move + env.render() + pygame.display.update() + + if event.type == pygame.MOUSEBUTTONDOWN: + # Pick up a piece (only able to if it has already been placed, and is not currently picked up) + if flatboard[pos] in placed_pieces_agent and not picked_up : + if abs(flatboard[pos]) >= abs(piece): + # Can only pick up a piece if there is a legal move to place it, other than where it was before + if not all(observation["action_mask"][9 * (piece - 1): 9 * piece] == 0): + picked_up = True + picked_up_pos = pos + # Remove a placed piece + picked_up_piece = int(flatboard[pos]) + piece = abs(picked_up_piece) + piece_size_selected = (piece + 1) // 2 + + index = np.where(env.unwrapped.board.squares == picked_up_piece)[0][0] + env.unwrapped.board.squares[index] = 0 + + # Set the only possible actions to be moving this piece to a new square + observation["action_mask"][pos + 9 * (piece-1)] = 0 # TODO: check if this is already zero + observation["action_mask"][:9 * (piece - 1)] = 0 # Zero out all the possible actions + observation["action_mask"][9 * (piece):] = 0 + + # If we are not picking a piece up, then try to place a piece, if it is legal to do so + else: + if action_prev != -1: + env.unwrapped.board.squares_preview[pos + 9 * (piece_size-1)] = 0 + action = pos + 9 * (piece - 1) + break + time.sleep(.1) + env.render() + env.step(action) \ No newline at end of file diff --git a/gobblet/examples/utils.py b/gobblet/examples/utils.py new file mode 100644 index 0000000..ffa96a8 --- /dev/null +++ b/gobblet/examples/utils.py @@ -0,0 +1,54 @@ +# from https://github.com/tdrmk/pygame_recorder +import cv2 +import pygame + +class ScreenRecorder: + """ + This class is used to record a PyGame surface and save it to a video file. + """ + + def __init__(self, width, height, fps, out_file='output.avi'): + """ + Initialize the recorder with parameters of the surface. + :param width: Width of the surface to capture + :param height: Height of the surface to capture + :param fps: Frames per second + :param out_file: Output file to save the recording + """ + print(f'Initializing ScreenRecorder with parameters width:{width} height:{height} fps:{fps}.') + print(f'Output of the screen recording saved to {out_file}.') + + # define the codec and create a video writer object + four_cc = cv2.VideoWriter_fourcc(*'XVID') + self.video = cv2.VideoWriter(out_file, four_cc, float(fps), (width, height)) + + def capture_frame(self, surf): + """ + Call this method every frame, pass in the pygame surface to capture. + :param surf: pygame surface to capture + :return: None + """ + """ + + Note: surface must have the dimensions specified in the constructor. + """ + # transform the pixels to the format used by open-cv + pixels = cv2.rotate(pygame.surfarray.pixels3d(surf), cv2.ROTATE_90_CLOCKWISE) + pixels = cv2.flip(pixels, 1) + pixels = cv2.cvtColor(pixels, cv2.COLOR_RGB2BGR) + + # write the frame + self.video.write(pixels) + + def end_recording(self): + """ + Call this method to stop recording. + :return: None + """ + # stop recording + self.video.release() + +# References +# For more tutorials on cv2.VideoWriter, go to: +# - https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_gui/py_video_display/py_video_display.html#display-video +# - https://medium.com/@enriqueav/how-to-create-video-animations-using-python-and-opencv-881b18e41397 \ No newline at end of file diff --git a/gobblet/game/__pycache__/__init__.cpython-38.pyc b/gobblet/game/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index 79b376f..0000000 Binary files a/gobblet/game/__pycache__/__init__.cpython-38.pyc and /dev/null differ diff --git a/gobblet/game/__pycache__/__init__.cpython-39.pyc b/gobblet/game/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index 4055f9a..0000000 Binary files a/gobblet/game/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/gobblet/game/__pycache__/board.cpython-38.pyc b/gobblet/game/__pycache__/board.cpython-38.pyc deleted file mode 100644 index 84138f1..0000000 Binary files a/gobblet/game/__pycache__/board.cpython-38.pyc and /dev/null differ diff --git a/gobblet/game/__pycache__/board.cpython-39.pyc b/gobblet/game/__pycache__/board.cpython-39.pyc deleted file mode 100644 index a9b6ad0..0000000 Binary files a/gobblet/game/__pycache__/board.cpython-39.pyc and /dev/null differ diff --git a/gobblet/game/__pycache__/gobblet.cpython-38.pyc b/gobblet/game/__pycache__/gobblet.cpython-38.pyc deleted file mode 100644 index 0bd9686..0000000 Binary files a/gobblet/game/__pycache__/gobblet.cpython-38.pyc and /dev/null differ diff --git a/gobblet/game/__pycache__/gobblet.cpython-39.pyc b/gobblet/game/__pycache__/gobblet.cpython-39.pyc deleted file mode 100644 index 290a3a0..0000000 Binary files a/gobblet/game/__pycache__/gobblet.cpython-39.pyc and /dev/null differ diff --git a/gobblet/game/__pycache__/utils.cpython-38.pyc b/gobblet/game/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000..74c33ad Binary files /dev/null and b/gobblet/game/__pycache__/utils.cpython-38.pyc differ diff --git a/gobblet/game/__pycache__/utils.cpython-39.pyc b/gobblet/game/__pycache__/utils.cpython-39.pyc deleted file mode 100644 index cc99aae..0000000 Binary files a/gobblet/game/__pycache__/utils.cpython-39.pyc and /dev/null differ diff --git a/gobblet/game/board.py b/gobblet/game/board.py index 687defc..aad1e8a 100644 --- a/gobblet/game/board.py +++ b/gobblet/game/board.py @@ -40,12 +40,25 @@ def __init__(self): def setup(self): self.calculate_winners() - def get_action(self, pos, piece): + def get_action_from_pos_piece(self, pos, piece): if pos in range(9) and piece in range(1,7): return 9 * (piece - 1) + pos else: return -1 + # Return an action if an agent can place of the specified size in the specified location + # Checks both possible pieces of that size, and returns -1 if neither can be placed (i.e., they are both covered) + def get_action(self, pos, piece_size, agent_index): + piece1 = piece_size * 2 - 1 + piece2 = piece_size * 2 + action1 = pos + 9 * (piece1 - 1) + action2 = pos + 9 * (piece2 - 1) + if self.is_legal(action1, agent_index): + return action1 + elif self.is_legal(action2, agent_index): + return action2 + else: + return -1 # To determine the position from an action, we take the number modulo 9, resulting in a number 0-8 def get_pos_from_action(self, action): @@ -67,19 +80,19 @@ def get_index_from_action(self, action): return pos + 9 * (piece_size - 1) # [0-26] - # Return true if an action is legal, false otherwise - def is_legal(self, action): + # Return true if an action is legal, false otherwise. + def is_legal(self, action, agent_index=0): pos = self.get_pos_from_action(action) # [0-8] piece = self.get_piece_from_action(action) # [1-6] piece_size = self.get_piece_size_from_action(action) # [1-3] + agent_multiplier = 1 if agent_index == 0 else -1 board = self.squares.reshape(3, 9) - # Check if this piece has been placed (if the piece number occurs anywhere on the level of that piece size) - if any(board[piece_size-1] == piece): - current_loc = np.where(board[piece_size-1] == piece)[0] # Returns array of values where piece is placed + if any(board[piece_size-1] == piece * agent_multiplier): + current_loc = np.where(board[piece_size-1] == piece * agent_multiplier)[0] # Returns array of values where piece is placed if len(current_loc) > 1: - print("--ERROR-- PIECE HAS BEEN USED TWICE") + raise Exception("Error: piece has been used twice") else: current_loc = current_loc[0] # Current location [0-27] # If this piece is currently covered, moving it is not a legal action @@ -100,14 +113,14 @@ def is_legal(self, action): return False # Update the board with an agent's move - def play_turn(self, agent, action): + def play_turn(self, agent_index, action): piece = self.get_piece_from_action(action) - if agent == 1: + if agent_index == 1: piece = piece * -1 index = self.get_index_from_action(action) # First: check if a move is legal or not - if not self.is_legal(action): + if not self.is_legal(action, agent_index): print("ILLEGAL MOVE: ", action) return # If piece has already been placed, clear previous location diff --git a/gobblet/game/gobblet.py b/gobblet/game/gobblet.py index ea7dbfb..925f484 100644 --- a/gobblet/game/gobblet.py +++ b/gobblet/game/gobblet.py @@ -9,7 +9,7 @@ This environment is part of the classic environments. Please read that page first for general information. -| Import | `from pettingzoo.classic.chess_v5` | +| Import | `from gobblet import gobblet_v1` | |--------------------|------------------------------------| | Actions | Discrete | | Parallel API | Yes | @@ -112,7 +112,7 @@ def env(render_mode=None, debug=None): env = raw_env(render_mode=render_mode, debug=debug) if render_mode == "ansi": env = wrappers.CaptureStdoutWrapper(env) - env = wrappers.TerminateIllegalWrapper(env, illegal_reward=-1) + # env = wrappers.TerminateIllegalWrapper(env, illegal_reward=-1) env = wrappers.AssertOutOfBoundsWrapper(env) env = wrappers.OrderEnforcingWrapper(env) return env @@ -121,8 +121,8 @@ def env(render_mode=None, debug=None): class raw_env(AECEnv): metadata = { - "render_modes": ["text", "text_full", "human"], - "name": "gobblet_v0", + "render_modes": ["human", "rgb_array", "text", "text_full"], + "name": "gobblet_v1", "is_parallelizable": True, "render_fps": 1, } @@ -175,7 +175,7 @@ def observe(self, agent): #TODO: test this if self.agents.index(agent) == 1: board = board * -1 # Swap the signs on the board for the two different agents - # Chess way of representing observations: specific channel for each color piece (e.g., two for each white small piece) + # Represent observations in the same way as pettingzoo.chess: specific channel for each color piece (e.g., two for each white small piece) layers = [] for i in range(1, 7): layers.append(board[(i - 1) // 2] == i) # 3x3 array with an entry of 1 for squares with each white piece (1, ..., 6) @@ -183,11 +183,6 @@ def observe(self, agent): #TODO: test this for i in range(1, 7): layers.append(board[(i - 1) // 2] == -i) # 3x3 array with an entry of 1 for squares with each black piece (-1, ..., -6) observation = np.stack(layers, axis=2).astype(np.int8) - # Tic-tac-toe way of representing observations: just show the raw board for current player and opponent player - # cur_p_board = np.greater(board, 0) - # opp_p_board = np.less(board, 0) - # observation = np.stack([cur_p_board, opp_p_board], axis=3).astype(np.int8) - # legal_moves = self._legal_moves() if agent == self.agent_selection else [] action_mask = np.zeros(54, "int8") @@ -252,7 +247,7 @@ def step(self, action): self._accumulate_rewards() self.turn += 1 self.action = action - if self.render_mode == "human" or "human_full": + if self.render_mode == "human" or "text" or "human_full": self.render() @@ -349,10 +344,10 @@ def getSymbol(input): print(top + " " + top + " " + top) print() - else: + elif self.render_mode == "human": # Adapted from PettingZoo connect_four.py - screen_width = 1287 - screen_height = 1287 + screen_width = 1000 #1287 + screen_height = 1000 #1287 if self.render_mode == "human": if self.screen is None: pygame.init() @@ -361,29 +356,33 @@ def getSymbol(input): elif self.screen is None: self.screen = pygame.Surface((screen_width, screen_height)) - # Load and scale all of the necessary images - tile_size = (screen_width * (91 / 99)) / 7 - + # Load and scale all the necessary images + # tile_size = screen_width * (11 / 53) + # tile_size = (screen_width * (45 / 53)) / 3 + tile_size = (screen_width * ((47 - 7) / 47)) / 3 + scale_large = 9 / 13 + scale_med = 6 / 13 + scale_small = 4 / 13 red = {} - red[3] = load_chip(tile_size, "GobbletLargeRed.png", 9/13) - red[2] = load_chip(tile_size, "GobbletMedRed.png", 9/13) - red[1] = load_chip(tile_size, "GobbletSmallRed.png", 9/13) + red[3] = load_chip(tile_size, "GobbletLargeRed.png", scale_large) + red[2] = load_chip(tile_size, "GobbletMedRed.png", scale_med) + red[1] = load_chip(tile_size, "GobbletSmallRed.png", scale_small) yellow = {} - yellow[3] = load_chip(tile_size, "GobbletLargeYellow.png", 9 / 13) - yellow[2] = load_chip(tile_size, "GobbletMedYellow.png", 9 / 13) - yellow[2] = load_chip(tile_size, "GobbletSmallYellow.png", 9 / 13) + yellow[3] = load_chip(tile_size, "GobbletLargeYellow.png", scale_large) + yellow[2] = load_chip(tile_size, "GobbletMedYellow.png", scale_med) + yellow[1] = load_chip(tile_size, "GobbletSmallYellow.png", scale_small) self.preview = {} self.preview["player_1"] = {} - self.preview["player_1"][3] = load_chip_preview(tile_size, "GobbletLargeRedPreview.png", 9 / 13) - self.preview["player_1"][2] = load_chip_preview(tile_size, "GobbletMedRedPreview.png", 9 / 13) - self.preview["player_1"][1] = load_chip_preview(tile_size, "GobbletSmallRedPreview.png", 9 / 13) + self.preview["player_1"][3] = load_chip_preview(tile_size, "GobbletLargeRedPreview.png", scale_large) + self.preview["player_1"][2] = load_chip_preview(tile_size, "GobbletMedRedPreview.png", scale_med) + self.preview["player_1"][1] = load_chip_preview(tile_size, "GobbletSmallRedPreview.png", scale_small) self.preview["player_2"] = {} - self.preview["player_2"][3] = load_chip_preview(tile_size, "GobbletLargeYellowPreview.png", 9 / 13) - self.preview["player_2"][2] = load_chip_preview(tile_size, "GobbletMedYellowPreview.png", 9 / 13) - self.preview["player_2"][1] = load_chip_preview(tile_size, "GobbletSmallYellowPreview.png", 9 / 13) + self.preview["player_2"][3] = load_chip_preview(tile_size, "GobbletLargeYellowPreview.png", scale_large) + self.preview["player_2"][2] = load_chip_preview(tile_size, "GobbletMedYellowPreview.png", scale_med) + self.preview["player_2"][1] = load_chip_preview(tile_size, "GobbletSmallYellowPreview.png", scale_small) preview_chips = {self.agents[0]: self.preview["player_1"], self.agents[1]: self.preview["player_1"]} @@ -394,56 +393,64 @@ def getSymbol(input): self.screen.blit(board_img, (0, 0)) - # Blit the necessary chips and their positions + offset = (screen_width * ((9+4) / 47)) + offset_side = (screen_width * (6 / 47)) - 1 # Slight adjustment to make everything align + offset_centering = offset * 1/3 + 5 + + # Blit the chips and their positions for i in range(9): for j in range(1, 4): - if self.board.squares[i + 9 * (j - 1)] == 1 * j: + if self.board.squares[i + 9 * (j - 1)] == 2 * j - 1 or \ + self.board.squares[i + 9 * (j - 1)] == 2 * j: # small pieces (1,2), medium pieces (3,4), large pieces (5,6) self.screen.blit( red[j], - ( - (i % 3) * (tile_size) + (tile_size * (6 / 13)), - int(i / 3) * (tile_size) + (tile_size * (6 / 13)), - ), + red[j].get_rect(center = + (int(i / 3) * (offset) + offset_side + offset_centering, + (i % 3) * (offset) + offset_side + offset_centering + ) + ) ) - if self.board.squares[i + 9 * (j - 1)] == -1 * j: + if self.board.squares[i + 9 * (j - 1)] == -1 * (2 * j - 1) or\ + self.board.squares[i + 9 * (j - 1)] == -1 * (2 * j): self.screen.blit( - red[j], - ( - (i % 3) * (tile_size) + (tile_size * (6 / 13)), - int(i / 3) * (tile_size) + (tile_size * (6 / 13)), - ), + yellow[j], + yellow[j].get_rect(center = + (int(i / 3) * (offset) + offset_side + offset_centering, + (i % 3) * (offset) + offset_side + offset_centering + ) + ) ) # Blit the preview chips and their positions for i in range(9): for j in range(1, 4): - if self.board.squares_preview[i + 9 * (j - 1)] == 1 * j: + if self.board.squares_preview[i + 9 * (j - 1)] == 1: self.screen.blit( - red[j], - ( - (i % 3) * (tile_size) + (tile_size * (6 / 13)), - int(i / 3) * (tile_size) + (tile_size * (6 / 13)), - ), + self.preview["player_1"][j], + self.preview["player_1"][j].get_rect(center = + (int(i / 3) * (offset) + offset_side + offset_centering, + (i % 3) * (offset) + offset_side + offset_centering + ) + ) ) - if self.board.squares_preview[i + 9 * (j - 1)] == -1 * j: + if self.board.squares_preview[i + 9 * (j - 1)] == -1: self.screen.blit( - red[j], - ( - (i % 3) * (tile_size) + (tile_size * (6 / 13)), - int(i / 3) * (tile_size) + (tile_size * (6 / 13)), - ), + self.preview["player_2"][j], + self.preview["player_2"][j].get_rect(center = + (int(i / 3) * (offset) + offset_side + offset_centering, + (i % 3) * (offset) + offset_side + offset_centering + ) + ) ) - if self.render_mode == "human": - pygame.display.update() - + pygame.display.update() + elif self.render_mode == "rgb_array": observation = np.array(pygame.surfarray.pixels3d(self.screen)) - - return ( - np.transpose(observation, axes=(1, 0, 2)) - if self.render_mode == "rgb_array" - else None - ) + return ( + np.transpose(observation, axes=(1, 0, 2)) + if self.render_mode == "rgb_array" + else None + ) def close(self): if self.screen is not None: diff --git a/gobblet/game/utils.py b/gobblet/game/utils.py index ba1b053..03d105d 100644 --- a/gobblet/game/utils.py +++ b/gobblet/game/utils.py @@ -1,7 +1,7 @@ import os import pygame - +# Modified from pettingzoo.classic.connect_four def get_image(path): cwd = os.path.dirname(__file__) diff --git a/gobblet/gobblet_v0.py b/gobblet/gobblet_v1.py similarity index 100% rename from gobblet/gobblet_v0.py rename to gobblet/gobblet_v1.py diff --git a/gobblet/log/gobblet/dqn/policy.pth b/gobblet/log/gobblet/dqn/policy.pth index b56e0cb..f165802 100644 Binary files a/gobblet/log/gobblet/dqn/policy.pth and b/gobblet/log/gobblet/dqn/policy.pth differ diff --git a/pyproject.toml b/pyproject.toml index 466950e..7125b68 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "gobblet-rl" -version = "1.0.2" +version = "1.1.0" authors = [ { name="Elliot Tower", email="elliot@elliottower.com" }, ] @@ -18,14 +18,17 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies=[ - "gym>=0.26.1", - "gymnasium>=0.27.1", - "numpy>=1.23.5", - "pettingzoo>=1.22.3", + "gym==0.26.1", + "gymnasium==0.27.1", + "numpy==1.23.5", + "pettingzo==1.22.3", "setuptools>=65.6.3", - "tianshou>=0.4.11", - "torch>=1.12.1", - "pytest>=7.1.2" + "tianshou==0.4.11", + "torch==1.12.1", + "pytest==7.1.2", + "pygame==2.1.2", + "ray==2.2.0" + ] [project.urls] diff --git a/tests/test_gobblet_env.py b/tests/test_gobblet_env.py index 7492e78..bd951e1 100644 --- a/tests/test_gobblet_env.py +++ b/tests/test_gobblet_env.py @@ -2,12 +2,12 @@ import pettingzoo.test import pytest import numpy as np -from gobblet import gobblet_v0 +from gobblet import gobblet_v1 # Note: raw_env is required in order to test the board state, as env() only allows observations @pytest.fixture(scope="function") def env(): - env = gobblet_v0.raw_env() + env = gobblet_v1.raw_env() env.reset() yield env env.close() @@ -32,16 +32,16 @@ def test_api(env): def test_parallel_api(env): - env = gobblet_v0.parallel_env() + env = gobblet_v1.parallel_env() pettingzoo.test.parallel_api_test(env, num_cycles=1000) def test_seed(env): - pettingzoo.test.seed_test(gobblet_v0.env) + pettingzoo.test.seed_test(gobblet_v1.env) def test_seed_raw(env): - pettingzoo.test.seed_test(gobblet_v0.raw_env) + pettingzoo.test.seed_test(gobblet_v1.raw_env) @pytest.mark.skip( @@ -54,7 +54,7 @@ def test_max_cycles(env): # Note: this test sometimes fails due to empty possible actions list, re-run if it fails def test_performance_benchmark(env): "Run PettingZoo performance benchmark on the env" - env = gobblet_v0.env() + env = gobblet_v1.env() pettingzoo.test.performance_benchmark(env) @@ -64,7 +64,7 @@ def test_save_obs(env): def test_render(env): "Verify that render() executes without error for human-readable output" - pettingzoo.test.render_test(gobblet_v0.raw_env) + pettingzoo.test.render_test(gobblet_v1.raw_env) def test_render_human(env):