Skip to content

Commit

Permalink
Update the TicTacToe environment (#1192)
Browse files Browse the repository at this point in the history
Co-authored-by: Elliot Tower <[email protected]>
  • Loading branch information
dm-ackerman and elliottower authored May 3, 2024
1 parent 38e2520 commit 98e8c20
Show file tree
Hide file tree
Showing 4 changed files with 251 additions and 119 deletions.
155 changes: 89 additions & 66 deletions pettingzoo/classic/tictactoe/board.py
Original file line number Diff line number Diff line change
@@ -1,79 +1,102 @@
TTT_PLAYER1_WIN = 0
TTT_PLAYER2_WIN = 1
TTT_TIE = -1
TTT_GAME_NOT_OVER = -2


class Board:
"""Board for a TicTacToe Game.
This tracks the position and identity of marks on the game board
and allows checking for a winner.
Example of usage:
import random
board = Board()
# random legal moves - for example purposes
def choose_move(board_obj: Board) -> int:
legal_moves = [i for i, mark in enumerate(board_obj.squares) if mark == 0]
return random.choice(legal_moves)
player = 0
while True:
move = choose_move(board)
board.play_turn(player, move)
status = board.game_status()
if status != TTT_GAME_NOT_OVER:
if status in [TTT_PLAYER1_WIN, TTT_PLAYER2_WIN]:
print(f"player {status} won")
else: # status == TTT_TIE
print("Tie Game")
break
player = player ^ 1 # swaps between players 0 and 1
"""

# indices of the winning lines: vertical(x3), horizontal(x3), diagonal(x2)
winning_combinations = [
(0, 1, 2),
(3, 4, 5),
(6, 7, 8),
(0, 3, 6),
(1, 4, 7),
(2, 5, 8),
(0, 4, 8),
(2, 4, 6),
]

def __init__(self):
# internally self.board.squares holds a flat representation of tic tac toe board
# where an empty board is [0, 0, 0, 0, 0, 0, 0, 0, 0]
# where indexes are column wise order
# self.squares holds a flat representation of the tic tac toe board.
# an empty board is [0, 0, 0, 0, 0, 0, 0, 0, 0].
# player 1's squares are marked 1, while player 2's are marked 2.
# mapping of the flat indices to the 3x3 grid is as follows:
# 0 3 6
# 1 4 7
# 2 5 8

# empty -- 0
# player 0 -- 1
# player 1 -- 2
self.squares = [0] * 9

# precommute possible winning combinations
self.calculate_winners()
@property
def _n_empty_squares(self):
"""The current number of empty squares on the board."""
return self.squares.count(0)

def setup(self):
self.calculate_winners()
def reset(self):
"""Remove all marks from the board."""
self.squares = [0] * 9

def play_turn(self, agent, pos):
# if spot is empty
if self.squares[pos] != 0:
return
if agent == 0:
self.squares[pos] = 1
elif agent == 1:
self.squares[pos] = 2
return

def calculate_winners(self):
winning_combinations = []
indices = [x for x in range(0, 9)]

# Vertical combinations
winning_combinations += [
tuple(indices[i : (i + 3)]) for i in range(0, len(indices), 3)
]

# Horizontal combinations
winning_combinations += [
tuple(indices[x] for x in range(y, len(indices), 3)) for y in range(0, 3)
]

# Diagonal combinations
winning_combinations.append(tuple(x for x in range(0, len(indices), 4)))
winning_combinations.append(tuple(x for x in range(2, len(indices) - 1, 2)))

self.winning_combinations = winning_combinations

# returns:
# -1 for no winner
# 1 -- agent 0 wins
# 2 -- agent 1 wins
def check_for_winner(self):
winner = -1
for combination in self.winning_combinations:
states = []
for index in combination:
states.append(self.squares[index])
if all(x == 1 for x in states):
winner = 1
if all(x == 2 for x in states):
winner = 2
return winner

def check_game_over(self):
winner = self.check_for_winner()

if winner == -1 and all(square in [1, 2] for square in self.squares):
# tie
return True
elif winner in [1, 2]:
return True
else:
return False
"""Place a mark by the agent in the spot given.
The following are required for a move to be valid:
* The agent must be a known agent ID (either 0 or 1).
* The spot must be be empty.
* The spot must be in the board (integer: 0 <= spot <= 8)
If any of those are not true, an assertion will fail.
"""
assert pos >= 0 and pos <= 8, "Invalid move location"
assert agent in [0, 1], "Invalid agent"
assert self.squares[pos] == 0, "Location is not empty"

# agent is [0, 1]. board values are stored as [1, 2].
self.squares[pos] = agent + 1

def game_status(self):
"""Return status (winner, TTT_TIE if no winner, or TTT_GAME_NOT_OVER)."""
for indices in self.winning_combinations:
states = [self.squares[idx] for idx in indices]
if states == [1, 1, 1]:
return TTT_PLAYER1_WIN
if states == [2, 2, 2]:
return TTT_PLAYER2_WIN
if self._n_empty_squares == 0:
return TTT_TIE
return TTT_GAME_NOT_OVER

def __str__(self):
return str(self.squares)

def legal_moves(self):
"""Return list of legal moves (as flat indices for spaces on the board)."""
return [i for i, mark in enumerate(self.squares) if mark == 0]
127 changes: 127 additions & 0 deletions pettingzoo/classic/tictactoe/test_board.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
"""Test cases for TicTacToe board."""

from __future__ import annotations

from typing import Any

import pytest

from pettingzoo.classic.tictactoe.board import ( # type: ignore
TTT_GAME_NOT_OVER,
TTT_PLAYER1_WIN,
TTT_PLAYER2_WIN,
TTT_TIE,
Board,
)

# Note: mapping of moves to board positions are:
# 0 3 6
# 1 4 7
# 2 5 8

agent2_win = {
"moves": [
# agent_id, position, board after move
(0, 4, [0, 0, 0, 0, 1, 0, 0, 0, 0]),
(1, 0, [2, 0, 0, 0, 1, 0, 0, 0, 0]),
(0, 2, [2, 0, 1, 0, 1, 0, 0, 0, 0]),
(1, 6, [2, 0, 1, 0, 1, 0, 2, 0, 0]),
(0, 3, [2, 0, 1, 1, 1, 0, 2, 0, 0]),
(1, 7, [2, 0, 1, 1, 1, 0, 2, 2, 0]),
(0, 1, [2, 1, 1, 1, 1, 0, 2, 2, 0]),
(1, 8, [2, 1, 1, 1, 1, 0, 2, 2, 2]), # agent 2 wins here
(0, 5, [2, 1, 1, 1, 1, 1, 2, 2, 2]),
],
"max_step": 7, # should not get past here
"winner": TTT_PLAYER2_WIN,
}

tie = {
"moves": [ # should be tie
(0, 0, [1, 0, 0, 0, 0, 0, 0, 0, 0]),
(1, 3, [1, 0, 0, 2, 0, 0, 0, 0, 0]),
(0, 1, [1, 1, 0, 2, 0, 0, 0, 0, 0]),
(1, 4, [1, 1, 0, 2, 2, 0, 0, 0, 0]),
(0, 5, [1, 1, 0, 2, 2, 1, 0, 0, 0]),
(1, 2, [1, 1, 2, 2, 2, 1, 0, 0, 0]),
(0, 6, [1, 1, 2, 2, 2, 1, 1, 0, 0]),
(1, 7, [1, 1, 2, 2, 2, 1, 1, 2, 0]),
(0, 8, [1, 1, 2, 2, 2, 1, 1, 2, 1]),
],
"max_step": 8,
"winner": TTT_TIE,
}

agent1_win = {
"moves": [
(0, 0, [1, 0, 0, 0, 0, 0, 0, 0, 0]),
(1, 3, [1, 0, 0, 2, 0, 0, 0, 0, 0]),
(0, 1, [1, 1, 0, 2, 0, 0, 0, 0, 0]),
(1, 4, [1, 1, 0, 2, 2, 0, 0, 0, 0]),
(0, 2, [1, 1, 1, 2, 2, 0, 0, 0, 0]), # agent 1 should win here
(1, 5, [1, 1, 1, 2, 2, 2, 0, 0, 0]),
(0, 6, [1, 1, 1, 2, 2, 2, 1, 0, 0]),
(1, 7, [1, 1, 1, 2, 2, 2, 1, 2, 0]),
(0, 8, [1, 1, 1, 2, 2, 2, 1, 2, 1]),
],
"max_step": 4,
"winner": TTT_PLAYER1_WIN,
}


@pytest.mark.parametrize("values", [agent1_win, agent2_win, tie])
def test_tictactoe_board_games(values: dict[str, Any]) -> None:
"""Test that TicTacToe games go as expected."""
expected_winner = values["winner"]
max_step = values["max_step"]

board = Board()
for i, (agent, pos, board_layout) in enumerate(values["moves"]):
assert i <= max_step, "max step exceed in tictactoe game"
board.play_turn(agent, pos)
assert board_layout == board.squares, "wrong tictactoe layout after move"
status = board.game_status()
if status != TTT_GAME_NOT_OVER:
assert i == max_step, "tictactoe game ended on wrong step"
assert status == expected_winner, "wrong winner in tictactoe board test"
break


def test_tictactoe_winning_boards() -> None:
"""Test that winning board configurations actually win."""
# these are the winning lines for player 1. Note that moves
# for player 2 are included to make it a legal board.
winning_lines = [ # vertical(x3), horizontal(x3), diagonal(x2)
[1, 1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 1, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 1, 1],
[1, 0, 0, 1, 0, 0, 1, 0, 0],
[0, 1, 0, 0, 1, 0, 0, 1, 0],
[0, 0, 1, 0, 0, 1, 0, 0, 1],
[1, 0, 0, 0, 1, 0, 0, 0, 1],
[0, 0, 1, 0, 1, 0, 1, 0, 0],
]
for line in winning_lines:
board = Board()
board.squares = line
assert board.game_status() == TTT_PLAYER1_WIN, "Bad win check in TicTacToe"


def test_tictactoe_bad_move() -> None:
"""Test that illegal TicTacToe moves are rejected."""
board = Board()
# 1) move out of bounds should be rejected
for outside_space in [-1, 9]:
with pytest.raises(AssertionError, match="Invalid move location"):
board.play_turn(0, outside_space)

# 2) move by unknown agent should be rejected
for unknown_agent in [-1, 2]:
with pytest.raises(AssertionError, match="Invalid agent"):
board.play_turn(unknown_agent, 0)

# 3) move in occupied space by either agent should be rejected
board.play_turn(0, 4) # this is fine
for agent in [0, 1]:
with pytest.raises(AssertionError, match="Location is not empty"):
board.play_turn(agent, 4) # repeating move is not valid
Loading

0 comments on commit 98e8c20

Please sign in to comment.