From b12f6ba6f0c0010dd68f4a6c510130b4b401657a Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Sat, 9 Mar 2024 16:46:34 +0000
Subject: [PATCH 01/23] Simplify TicTacToe reward accumulation

Rewards are only set once, only need to accumulate them once.
No need to modify the accumulated rewards if they ae not set.
---
 pettingzoo/classic/tictactoe/tictactoe.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py
index 45d357b6f..7ee7a2a2f 100644
--- a/pettingzoo/classic/tictactoe/tictactoe.py
+++ b/pettingzoo/classic/tictactoe/tictactoe.py
@@ -223,12 +223,11 @@ def step(self, action):
 
             # once either play wins or there is a draw, game over, both players are done
             self.terminations = {i: True for i in self.agents}
+            self._accumulate_rewards()
 
         # Switch selection to next agents
-        self._cumulative_rewards[self.agent_selection] = 0
         self.agent_selection = next_agent
 
-        self._accumulate_rewards()
         if self.render_mode == "human":
             self.render()
 

From 24f7569d33e138389364251dff27d04e1fc0d7cf Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Sat, 9 Mar 2024 17:58:35 +0000
Subject: [PATCH 02/23] Don't update TicTacToe agent on winning step

For unknown reasons, this is required for stable baselines
training to work correctly. It does not appear to impact other
usage as the agents are still looped over correctly after the
game ends - just in a different order.
---
 pettingzoo/classic/tictactoe/tictactoe.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py
index 7ee7a2a2f..07bf2136a 100644
--- a/pettingzoo/classic/tictactoe/tictactoe.py
+++ b/pettingzoo/classic/tictactoe/tictactoe.py
@@ -224,9 +224,9 @@ def step(self, action):
             # once either play wins or there is a draw, game over, both players are done
             self.terminations = {i: True for i in self.agents}
             self._accumulate_rewards()
-
-        # Switch selection to next agents
-        self.agent_selection = next_agent
+        else:
+            # If no one has won, switch selection to next agents
+            self.agent_selection = next_agent
 
         if self.render_mode == "human":
             self.render()

From e3f9cc90e5b4da0868398d99d7901f1c9cbe5cd6 Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Sat, 9 Mar 2024 19:40:58 +0000
Subject: [PATCH 03/23] Move TicTacToe test for valid moves to Board

This makes the env code less cluttered and better encapsulates
the board behavior. It also expands the checks for a valid move.
---
 pettingzoo/classic/tictactoe/board.py     | 24 +++++++++++++++++++++--
 pettingzoo/classic/tictactoe/tictactoe.py |  2 --
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/board.py b/pettingzoo/classic/tictactoe/board.py
index 35186a57a..6f6dab5bd 100644
--- a/pettingzoo/classic/tictactoe/board.py
+++ b/pettingzoo/classic/tictactoe/board.py
@@ -1,3 +1,10 @@
+class BadTicTacToeMoveException(Exception):
+    """Exception raised when a bad move is made on TicTacToe board."""
+
+    def __init__(self, message="Bad TicTacToe move"):
+        super().__init__(message)
+
+
 class Board:
     def __init__(self):
         # internally self.board.squares holds a flat representation of tic tac toe board
@@ -19,9 +26,22 @@ def setup(self):
         self.calculate_winners()
 
     def play_turn(self, agent, pos):
-        # if spot is empty
+        """Place a mark by the agent in the spot given.
+
+        The following are required for a move to be valid:
+        * The agent must be a known agent (either 0 or 1).
+        * The spot must be be empty.
+        * The spot must be in the board (integer: 0 <= spot <= 8)
+
+        If any of those are not true, a BadTicTacToeMoveException
+        will be raised.
+        """
+        if pos < 0 or pos > 8:
+            raise BadTicTacToeMoveException("Invalid move location")
+        if agent != 0 and agent != 1:
+            raise BadTicTacToeMoveException("Invalid agent")
         if self.squares[pos] != 0:
-            return
+            raise BadTicTacToeMoveException("Location is not empty")
         if agent == 0:
             self.squares[pos] = 1
         elif agent == 1:
diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py
index 07bf2136a..9c790f582 100644
--- a/pettingzoo/classic/tictactoe/tictactoe.py
+++ b/pettingzoo/classic/tictactoe/tictactoe.py
@@ -196,8 +196,6 @@ def step(self, action):
             or self.truncations[self.agent_selection]
         ):
             return self._was_dead_step(action)
-        # check if input action is a valid move (0 == empty spot)
-        assert self.board.squares[action] == 0, "played illegal move"
         # play turn
         self.board.play_turn(self.agents.index(self.agent_selection), action)
 

From 6708fdbdb33e4dda58730f70a7bd6ec3474014fb Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Sat, 9 Mar 2024 19:59:19 +0000
Subject: [PATCH 04/23] Hard code winning lines in TicTacToe board

These never change. There is no reason to recalculate them constantly.
---
 pettingzoo/classic/tictactoe/board.py | 38 +++++++++------------------
 1 file changed, 12 insertions(+), 26 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/board.py b/pettingzoo/classic/tictactoe/board.py
index 6f6dab5bd..dd75cb794 100644
--- a/pettingzoo/classic/tictactoe/board.py
+++ b/pettingzoo/classic/tictactoe/board.py
@@ -6,6 +6,18 @@ def __init__(self, message="Bad TicTacToe move"):
 
 
 class Board:
+    # indices of the winning lines: vertical(x3), horizontal(x3), diagonal(x2)
+    winning_combinations = [
+        (0, 1, 2),
+        (3, 4, 5),
+        (6, 7, 8),
+        (0, 3, 6),
+        (1, 4, 7),
+        (2, 5, 8),
+        (0, 4, 8),
+        (2, 4, 6),
+    ]
+
     def __init__(self):
         # internally self.board.squares holds a flat representation of tic tac toe board
         # where an empty board is [0, 0, 0, 0, 0, 0, 0, 0, 0]
@@ -19,12 +31,6 @@ def __init__(self):
         # player 1 -- 2
         self.squares = [0] * 9
 
-        # precommute possible winning combinations
-        self.calculate_winners()
-
-    def setup(self):
-        self.calculate_winners()
-
     def play_turn(self, agent, pos):
         """Place a mark by the agent in the spot given.
 
@@ -48,26 +54,6 @@ def play_turn(self, agent, pos):
             self.squares[pos] = 2
         return
 
-    def calculate_winners(self):
-        winning_combinations = []
-        indices = [x for x in range(0, 9)]
-
-        # Vertical combinations
-        winning_combinations += [
-            tuple(indices[i : (i + 3)]) for i in range(0, len(indices), 3)
-        ]
-
-        # Horizontal combinations
-        winning_combinations += [
-            tuple(indices[x] for x in range(y, len(indices), 3)) for y in range(0, 3)
-        ]
-
-        # Diagonal combinations
-        winning_combinations.append(tuple(x for x in range(0, len(indices), 4)))
-        winning_combinations.append(tuple(x for x in range(2, len(indices) - 1, 2)))
-
-        self.winning_combinations = winning_combinations
-
     # returns:
     # -1 for no winner
     # 1 -- agent 0 wins

From ea15ddf97de9f565e8e170a51d445750fa0f019c Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Sat, 9 Mar 2024 20:11:20 +0000
Subject: [PATCH 05/23] Simplify win check in TicTacToe

---
 pettingzoo/classic/tictactoe/board.py | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/board.py b/pettingzoo/classic/tictactoe/board.py
index dd75cb794..082ca5197 100644
--- a/pettingzoo/classic/tictactoe/board.py
+++ b/pettingzoo/classic/tictactoe/board.py
@@ -54,21 +54,16 @@ def play_turn(self, agent, pos):
             self.squares[pos] = 2
         return
 
-    # returns:
-    # -1 for no winner
-    # 1 -- agent 0 wins
-    # 2 -- agent 1 wins
     def check_for_winner(self):
-        winner = -1
-        for combination in self.winning_combinations:
-            states = []
-            for index in combination:
-                states.append(self.squares[index])
-            if all(x == 1 for x in states):
-                winner = 1
-            if all(x == 2 for x in states):
-                winner = 2
-        return winner
+        """Return the winning player (1 or 2), or -1 if no winner."""
+        for indices in self.winning_combinations:
+            states = [self.squares[idx] for idx in indices]
+            if states == [1, 1, 1]:
+                return 1
+            if states == [2, 2, 2]:
+                return 2
+        # no winner found
+        return -1
 
     def check_game_over(self):
         winner = self.check_for_winner()

From 967caae481afc1016218028003f80af178ab796d Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Sat, 9 Mar 2024 20:38:31 +0000
Subject: [PATCH 06/23] Clean up tictactoe board functions

---
 pettingzoo/classic/tictactoe/board.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/board.py b/pettingzoo/classic/tictactoe/board.py
index 082ca5197..304be89da 100644
--- a/pettingzoo/classic/tictactoe/board.py
+++ b/pettingzoo/classic/tictactoe/board.py
@@ -48,11 +48,9 @@ def play_turn(self, agent, pos):
             raise BadTicTacToeMoveException("Invalid agent")
         if self.squares[pos] != 0:
             raise BadTicTacToeMoveException("Location is not empty")
-        if agent == 0:
-            self.squares[pos] = 1
-        elif agent == 1:
-            self.squares[pos] = 2
-        return
+
+        # agent is [0, 1]. board values are stored as [1, 2].
+        self.squares[pos] = agent + 1
 
     def check_for_winner(self):
         """Return the winning player (1 or 2), or -1 if no winner."""
@@ -68,13 +66,14 @@ def check_for_winner(self):
     def check_game_over(self):
         winner = self.check_for_winner()
 
-        if winner == -1 and all(square in [1, 2] for square in self.squares):
-            # tie
+        if winner in [1, 2]:
             return True
-        elif winner in [1, 2]:
+
+        # check for tie (all spots occupied)
+        if 0 not in self.squares:
             return True
-        else:
-            return False
+
+        return False
 
     def __str__(self):
         return str(self.squares)

From e6851a152bd4bfd77ba1d616bfebb3bfdb2ac4ba Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Mon, 11 Mar 2024 18:46:32 +0000
Subject: [PATCH 07/23] Add reset to TicTacToe board

---
 pettingzoo/classic/tictactoe/board.py     | 4 ++++
 pettingzoo/classic/tictactoe/tictactoe.py | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/pettingzoo/classic/tictactoe/board.py b/pettingzoo/classic/tictactoe/board.py
index 304be89da..f2a201ef9 100644
--- a/pettingzoo/classic/tictactoe/board.py
+++ b/pettingzoo/classic/tictactoe/board.py
@@ -31,6 +31,10 @@ def __init__(self):
         # player 1 -- 2
         self.squares = [0] * 9
 
+    def reset(self):
+        """Remove all marks from the board."""
+        self.squares = [0] * 9
+
     def play_turn(self, agent, pos):
         """Place a mark by the agent in the spot given.
 
diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py
index 9c790f582..0640801cb 100644
--- a/pettingzoo/classic/tictactoe/tictactoe.py
+++ b/pettingzoo/classic/tictactoe/tictactoe.py
@@ -231,7 +231,7 @@ def step(self, action):
 
     def reset(self, seed=None, options=None):
         # reset environment
-        self.board = Board()
+        self.board.reset()
 
         self.agents = self.possible_agents[:]
         self.rewards = {i: 0 for i in self.agents}

From 42794bc194489da86487370b9151f91eb23694ed Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Mon, 11 Mar 2024 18:49:41 +0000
Subject: [PATCH 08/23] Update TicTacToe masking and observe

---
 pettingzoo/classic/tictactoe/tictactoe.py | 25 +++++++++++++----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py
index 0640801cb..e50b35538 100644
--- a/pettingzoo/classic/tictactoe/tictactoe.py
+++ b/pettingzoo/classic/tictactoe/tictactoe.py
@@ -168,27 +168,30 @@ def observe(self, agent):
         cur_player = self.possible_agents.index(agent)
         opp_player = (cur_player + 1) % 2
 
-        cur_p_board = np.equal(board_vals, cur_player + 1)
-        opp_p_board = np.equal(board_vals, opp_player + 1)
+        observation = np.empty((3, 3, 2), dtype=np.int8)
+        observation[:, :, 0] = np.equal(board_vals, cur_player + 1)
+        observation[:, :, 1] = np.equal(board_vals, opp_player + 1)
 
-        observation = np.stack([cur_p_board, opp_p_board], axis=2).astype(np.int8)
-        legal_moves = self._legal_moves() if agent == self.agent_selection else []
-
-        action_mask = np.zeros(9, "int8")
-        for i in legal_moves:
-            action_mask[i] = 1
+        action_mask = self._get_mask(agent)
 
         return {"observation": observation, "action_mask": action_mask}
 
+    def _get_mask(self, agent):
+        action_mask = np.zeros(9, dtype=np.int8)
+
+        if agent == self.agent_selection:
+            for i, value in enumerate(self.board.squares):
+                if value == 0:
+                    action_mask[i] = 1
+
+        return action_mask
+
     def observation_space(self, agent):
         return self.observation_spaces[agent]
 
     def action_space(self, agent):
         return self.action_spaces[agent]
 
-    def _legal_moves(self):
-        return [i for i in range(len(self.board.squares)) if self.board.squares[i] == 0]
-
     # action in this case is a value from 0 to 8 indicating position to move on tictactoe board
     def step(self, action):
         if (

From 2847c1752fdd31374b51a2ca83dcfa1d3e57b06e Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Mon, 11 Mar 2024 18:53:43 +0000
Subject: [PATCH 09/23] Update TicTacToe winning code

removes the duplicate calls to check winner
---
 pettingzoo/classic/tictactoe/board.py     | 38 ++++++++++++-----------
 pettingzoo/classic/tictactoe/tictactoe.py | 21 +++++--------
 2 files changed, 28 insertions(+), 31 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/board.py b/pettingzoo/classic/tictactoe/board.py
index f2a201ef9..1fefba821 100644
--- a/pettingzoo/classic/tictactoe/board.py
+++ b/pettingzoo/classic/tictactoe/board.py
@@ -5,6 +5,12 @@ def __init__(self, message="Bad TicTacToe move"):
         super().__init__(message)
 
 
+TTT_PLAYER1_WIN = 0
+TTT_PLAYER2_WIN = 1
+TTT_TIE = -1
+TTT_GAME_NOT_OVER = -2
+
+
 class Board:
     # indices of the winning lines: vertical(x3), horizontal(x3), diagonal(x2)
     winning_combinations = [
@@ -31,6 +37,10 @@ def __init__(self):
         # player 1 -- 2
         self.squares = [0] * 9
 
+    @property
+    def _n_empty_squares(self):
+        return self.squares.count(0)
+
     def reset(self):
         """Remove all marks from the board."""
         self.squares = [0] * 9
@@ -56,28 +66,20 @@ def play_turn(self, agent, pos):
         # agent is [0, 1]. board values are stored as [1, 2].
         self.squares[pos] = agent + 1
 
-    def check_for_winner(self):
-        """Return the winning player (1 or 2), or -1 if no winner."""
+    def game_status(self):
+        """Return status (winner, TTT_TIE if no winner, or TTT_GAME_NOT_OVER)."""
+        # need at least 5 moves to win
+        if self._n_empty_squares > 4:
+            return TTT_GAME_NOT_OVER
         for indices in self.winning_combinations:
             states = [self.squares[idx] for idx in indices]
             if states == [1, 1, 1]:
-                return 1
+                return TTT_PLAYER1_WIN
             if states == [2, 2, 2]:
-                return 2
-        # no winner found
-        return -1
-
-    def check_game_over(self):
-        winner = self.check_for_winner()
-
-        if winner in [1, 2]:
-            return True
-
-        # check for tie (all spots occupied)
-        if 0 not in self.squares:
-            return True
-
-        return False
+                return TTT_PLAYER2_WIN
+        if self._n_empty_squares == 0:
+            return TTT_TIE
+        return TTT_GAME_NOT_OVER
 
     def __str__(self):
         return str(self.squares)
diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py
index e50b35538..75729de74 100644
--- a/pettingzoo/classic/tictactoe/tictactoe.py
+++ b/pettingzoo/classic/tictactoe/tictactoe.py
@@ -79,7 +79,7 @@
 from gymnasium.utils import EzPickle
 
 from pettingzoo import AECEnv
-from pettingzoo.classic.tictactoe.board import Board
+from pettingzoo.classic.tictactoe.board import TTT_GAME_NOT_OVER, TTT_TIE, Board
 from pettingzoo.utils import agent_selector, wrappers
 
 
@@ -207,20 +207,15 @@ def step(self, action):
         # next_agent = self.agents[(self.agents.index(self.agent_selection) + 1) % len(self.agents)]
         next_agent = self._agent_selector.next()
 
-        if self.board.check_game_over():
-            winner = self.board.check_for_winner()
-
-            if winner == -1:
-                # tie
+        status = self.board.game_status()
+        if status != TTT_GAME_NOT_OVER:
+            if status == TTT_TIE:
                 pass
-            elif winner == 1:
-                # agent 0 won
-                self.rewards[self.agents[0]] += 1
-                self.rewards[self.agents[1]] -= 1
             else:
-                # agent 1 won
-                self.rewards[self.agents[1]] += 1
-                self.rewards[self.agents[0]] -= 1
+                winner = status  # either TTT_PLAYER1_WIN or TTT_PLAYER2_WIN
+                loser = winner ^ 1  # 0 -> 1; 1 -> 0
+                self.rewards[self.agents[winner]] += 1
+                self.rewards[self.agents[loser]] -= 1
 
             # once either play wins or there is a draw, game over, both players are done
             self.terminations = {i: True for i in self.agents}

From eda50333ad05521fdcc835d7d8809c94ebe0408f Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Mon, 11 Mar 2024 19:54:20 +0000
Subject: [PATCH 10/23] Minor cleanups of TicTacToe code

---
 pettingzoo/classic/tictactoe/tictactoe.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py
index 75729de74..fe96481c5 100644
--- a/pettingzoo/classic/tictactoe/tictactoe.py
+++ b/pettingzoo/classic/tictactoe/tictactoe.py
@@ -141,10 +141,10 @@ def __init__(
         self.rewards = {i: 0 for i in self.agents}
         self.terminations = {i: False for i in self.agents}
         self.truncations = {i: False for i in self.agents}
-        self.infos = {i: {"legal_moves": list(range(0, 9))} for i in self.agents}
+        self.infos = {i: {} for i in self.agents}
 
         self._agent_selector = agent_selector(self.agents)
-        self.agent_selection = self._agent_selector.reset()
+        self.agent_selection = self._agent_selector.next()
 
         self.render_mode = render_mode
         self.screen_height = screen_height
@@ -202,9 +202,6 @@ def step(self, action):
         # play turn
         self.board.play_turn(self.agents.index(self.agent_selection), action)
 
-        # update infos
-        # list of valid actions (indexes in board)
-        # next_agent = self.agents[(self.agents.index(self.agent_selection) + 1) % len(self.agents)]
         next_agent = self._agent_selector.next()
 
         status = self.board.game_status()
@@ -239,7 +236,6 @@ def reset(self, seed=None, options=None):
         self.infos = {i: {} for i in self.agents}
         # selects the first agent
         self._agent_selector.reinit(self.agents)
-        self._agent_selector.reset()
         self.agent_selection = self._agent_selector.reset()
 
         if self.screen is None:

From 364c307de3e238df705b3be9e07565012298b0de Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Mon, 11 Mar 2024 20:04:16 +0000
Subject: [PATCH 11/23] Don't create screen if not rending in TicTacToe

---
 pettingzoo/classic/tictactoe/tictactoe.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py
index fe96481c5..7f6c9d127 100644
--- a/pettingzoo/classic/tictactoe/tictactoe.py
+++ b/pettingzoo/classic/tictactoe/tictactoe.py
@@ -238,7 +238,7 @@ def reset(self, seed=None, options=None):
         self._agent_selector.reinit(self.agents)
         self.agent_selection = self._agent_selector.reset()
 
-        if self.screen is None:
+        if self.render_mode is not None and self.screen is None:
             pygame.init()
 
         if self.render_mode == "human":
@@ -246,7 +246,7 @@ def reset(self, seed=None, options=None):
                 (self.screen_height, self.screen_height)
             )
             pygame.display.set_caption("Tic-Tac-Toe")
-        else:
+        elif self.render_mode == "rgb_array":
             self.screen = pygame.Surface((self.screen_height, self.screen_height))
 
     def close(self):

From 758e6a2f4769263ae8a191de2eddcd4637660c70 Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Wed, 13 Mar 2024 20:58:56 +0000
Subject: [PATCH 12/23] Add legal_moves() to TicTacToe board

This keeps the env from needing to access the internals
of the board to get the moves available.
---
 pettingzoo/classic/tictactoe/board.py     | 3 +++
 pettingzoo/classic/tictactoe/tictactoe.py | 5 ++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/board.py b/pettingzoo/classic/tictactoe/board.py
index 1fefba821..2b7b414bf 100644
--- a/pettingzoo/classic/tictactoe/board.py
+++ b/pettingzoo/classic/tictactoe/board.py
@@ -83,3 +83,6 @@ def game_status(self):
 
     def __str__(self):
         return str(self.squares)
+
+    def legal_moves(self):
+        return [i for i, mark in enumerate(self.squares) if mark == 0]
diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py
index 7f6c9d127..00c8d85c2 100644
--- a/pettingzoo/classic/tictactoe/tictactoe.py
+++ b/pettingzoo/classic/tictactoe/tictactoe.py
@@ -180,9 +180,8 @@ def _get_mask(self, agent):
         action_mask = np.zeros(9, dtype=np.int8)
 
         if agent == self.agent_selection:
-            for i, value in enumerate(self.board.squares):
-                if value == 0:
-                    action_mask[i] = 1
+            for i in self.board.legal_moves():
+                action_mask[i] = 1
 
         return action_mask
 

From 8b7ce5e626f765b164310d47badcc54667a5a111 Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Wed, 20 Mar 2024 00:34:58 +0000
Subject: [PATCH 13/23] Remove win detection short-cut in TicTacToe

This causes problems with test cases that have invalid
board configurations to test win lines.
---
 pettingzoo/classic/tictactoe/board.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/board.py b/pettingzoo/classic/tictactoe/board.py
index 2b7b414bf..a9fd2bbed 100644
--- a/pettingzoo/classic/tictactoe/board.py
+++ b/pettingzoo/classic/tictactoe/board.py
@@ -68,9 +68,6 @@ def play_turn(self, agent, pos):
 
     def game_status(self):
         """Return status (winner, TTT_TIE if no winner, or TTT_GAME_NOT_OVER)."""
-        # need at least 5 moves to win
-        if self._n_empty_squares > 4:
-            return TTT_GAME_NOT_OVER
         for indices in self.winning_combinations:
             states = [self.squares[idx] for idx in indices]
             if states == [1, 1, 1]:

From 5bebafa38b44e40c3fbcc73874798cbcc84a479b Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Wed, 20 Mar 2024 00:37:54 +0000
Subject: [PATCH 14/23] Remove unneeded variable in TicTacToe

---
 pettingzoo/classic/tictactoe/tictactoe.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py
index 00c8d85c2..bd14255a3 100644
--- a/pettingzoo/classic/tictactoe/tictactoe.py
+++ b/pettingzoo/classic/tictactoe/tictactoe.py
@@ -198,10 +198,8 @@ def step(self, action):
             or self.truncations[self.agent_selection]
         ):
             return self._was_dead_step(action)
-        # play turn
-        self.board.play_turn(self.agents.index(self.agent_selection), action)
 
-        next_agent = self._agent_selector.next()
+        self.board.play_turn(self.agents.index(self.agent_selection), action)
 
         status = self.board.game_status()
         if status != TTT_GAME_NOT_OVER:
@@ -217,8 +215,8 @@ def step(self, action):
             self.terminations = {i: True for i in self.agents}
             self._accumulate_rewards()
         else:
-            # If no one has won, switch selection to next agents
-            self.agent_selection = next_agent
+            # If the game is still going, switch selection to next agent
+            self.agent_selection = self._agent_selector.next()
 
         if self.render_mode == "human":
             self.render()

From 8e33bf74b8fe3e5aec7b5680c51e94f517e36adb Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Wed, 20 Mar 2024 01:04:17 +0000
Subject: [PATCH 15/23] Add test cases for TicTacToe board

---
 pettingzoo/classic/tictactoe/test_board.py | 126 +++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 pettingzoo/classic/tictactoe/test_board.py

diff --git a/pettingzoo/classic/tictactoe/test_board.py b/pettingzoo/classic/tictactoe/test_board.py
new file mode 100644
index 000000000..bc05dd68a
--- /dev/null
+++ b/pettingzoo/classic/tictactoe/test_board.py
@@ -0,0 +1,126 @@
+"""Test cases for TicTacToe board."""
+
+from typing import Any
+
+import pytest
+
+from pettingzoo.classic.tictactoe.board import (  # type: ignore
+    TTT_GAME_NOT_OVER,
+    TTT_PLAYER1_WIN,
+    TTT_PLAYER2_WIN,
+    TTT_TIE,
+    BadTicTacToeMoveException,
+    Board,
+)
+
+# Note: mapping of moves to board positions are:
+# 0 3 6
+# 1 4 7
+# 2 5 8
+
+agent2_win = {
+    "moves": [
+        # agent_id, position, board after move
+        (0, 4, [0, 0, 0, 0, 1, 0, 0, 0, 0]),
+        (1, 0, [2, 0, 0, 0, 1, 0, 0, 0, 0]),
+        (0, 2, [2, 0, 1, 0, 1, 0, 0, 0, 0]),
+        (1, 6, [2, 0, 1, 0, 1, 0, 2, 0, 0]),
+        (0, 3, [2, 0, 1, 1, 1, 0, 2, 0, 0]),
+        (1, 7, [2, 0, 1, 1, 1, 0, 2, 2, 0]),
+        (0, 1, [2, 1, 1, 1, 1, 0, 2, 2, 0]),
+        (1, 8, [2, 1, 1, 1, 1, 0, 2, 2, 2]),  # agent 2 wins here
+        (0, 5, [2, 1, 1, 1, 1, 1, 2, 2, 2]),
+    ],
+    "max_step": 7,  # should not get past here
+    "winner": TTT_PLAYER2_WIN,
+}
+
+tie = {
+    "moves": [  # should be tie
+        (0, 0, [1, 0, 0, 0, 0, 0, 0, 0, 0]),
+        (1, 3, [1, 0, 0, 2, 0, 0, 0, 0, 0]),
+        (0, 1, [1, 1, 0, 2, 0, 0, 0, 0, 0]),
+        (1, 4, [1, 1, 0, 2, 2, 0, 0, 0, 0]),
+        (0, 5, [1, 1, 0, 2, 2, 1, 0, 0, 0]),
+        (1, 2, [1, 1, 2, 2, 2, 1, 0, 0, 0]),
+        (0, 6, [1, 1, 2, 2, 2, 1, 1, 0, 0]),
+        (1, 7, [1, 1, 2, 2, 2, 1, 1, 2, 0]),
+        (0, 8, [1, 1, 2, 2, 2, 1, 1, 2, 1]),
+    ],
+    "max_step": 8,
+    "winner": TTT_TIE,
+}
+
+agent1_win = {
+    "moves": [
+        (0, 0, [1, 0, 0, 0, 0, 0, 0, 0, 0]),
+        (1, 3, [1, 0, 0, 2, 0, 0, 0, 0, 0]),
+        (0, 1, [1, 1, 0, 2, 0, 0, 0, 0, 0]),
+        (1, 4, [1, 1, 0, 2, 2, 0, 0, 0, 0]),
+        (0, 2, [1, 1, 1, 2, 2, 0, 0, 0, 0]),  # agent 1 should win here
+        (1, 5, [1, 1, 1, 2, 2, 2, 0, 0, 0]),
+        (0, 6, [1, 1, 1, 2, 2, 2, 1, 0, 0]),
+        (1, 7, [1, 1, 1, 2, 2, 2, 1, 2, 0]),
+        (0, 8, [1, 1, 1, 2, 2, 2, 1, 2, 1]),
+    ],
+    "max_step": 4,
+    "winner": TTT_PLAYER1_WIN,
+}
+
+
+@pytest.mark.parametrize("values", [agent1_win, agent2_win, tie])
+def test_tictactoe_board_games(values: dict[str, Any]) -> None:
+    """Test that TicTacToe games go as expected."""
+    expected_winner = values["winner"]
+    max_step = values["max_step"]
+
+    board = Board()
+    for i, (agent, pos, board_layout) in enumerate(values["moves"]):
+        assert i <= max_step, "max step exceed in tictactoe game"
+        board.play_turn(agent, pos)
+        assert board_layout == board.squares, "wrong tictactoe layout after move"
+        status = board.game_status()
+        if status != TTT_GAME_NOT_OVER:
+            assert i == max_step, "tictactoe game ended on wrong step"
+            assert status == expected_winner, "wrong winner in tictactoe board test"
+            break
+
+
+def test_tictactoe_winning_boards() -> None:
+    """Test that winning board configurations actually win."""
+    # these are the winning lines for player 1. Note that moves
+    # for player 2 are included to make it a legal board.
+    winning_lines = [  # vertical(x3), horizontal(x3), diagonal(x2)
+        [1, 1, 1, 0, 0, 0, 0, 0, 0],
+        [0, 0, 0, 1, 1, 1, 0, 0, 0],
+        [0, 0, 0, 0, 0, 0, 1, 1, 1],
+        [1, 0, 0, 1, 0, 0, 1, 0, 0],
+        [0, 1, 0, 0, 1, 0, 0, 1, 0],
+        [0, 0, 1, 0, 0, 1, 0, 0, 1],
+        [1, 0, 0, 0, 1, 0, 0, 0, 1],
+        [0, 0, 1, 0, 1, 0, 1, 0, 0],
+    ]
+    for line in winning_lines:
+        board = Board()
+        board.squares = line
+        assert board.game_status() == TTT_PLAYER1_WIN, "Bad win check in TicTacToe"
+
+
+def test_tictactoe_bad_move() -> None:
+    """Test that illegal TicTacToe moves are rejected."""
+    board = Board()
+    # 1) move out of bounds should be rejected
+    for outside_space in [-1, 9]:
+        with pytest.raises(BadTicTacToeMoveException):
+            board.play_turn(0, outside_space)
+
+    # 2) move by unknown agent should be rejected
+    for unknown_agent in [-1, 2]:
+        with pytest.raises(BadTicTacToeMoveException):
+            board.play_turn(unknown_agent, 0)
+
+    # 3) move in occupied space by either agent should be rejected
+    board.play_turn(0, 4)  # this is fine
+    for agent in [0, 1]:
+        with pytest.raises(BadTicTacToeMoveException):
+            board.play_turn(agent, 4)  # repeating move is not valid

From df4c950814fc1801ff05e4cfaf168c9311ddecad Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Wed, 20 Mar 2024 02:40:41 +0000
Subject: [PATCH 16/23] Update TicTacToe code comments

---
 pettingzoo/classic/tictactoe/board.py     | 44 ++++++++++++++++++-----
 pettingzoo/classic/tictactoe/tictactoe.py | 17 ++++-----
 2 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/board.py b/pettingzoo/classic/tictactoe/board.py
index a9fd2bbed..230e2775a 100644
--- a/pettingzoo/classic/tictactoe/board.py
+++ b/pettingzoo/classic/tictactoe/board.py
@@ -12,6 +12,35 @@ def __init__(self, message="Bad TicTacToe move"):
 
 
 class Board:
+    """Board for a TicTacToe Game.
+
+    This tracks the position and identity of marks on the game board
+    and allows checking for a winner.
+
+    Example of usage:
+
+    import random
+    board = Board()
+
+    # random legal moves - for example purposes
+    def choose_move(board_obj: Board) -> int:
+        legal_moves = [i for i, mark in enumerate(board_obj.squares) if mark == 0]
+        return random.choice(legal_moves)
+
+    player = 0
+    while True:
+        move = choose_move(board)
+        board.play_turn(player, move)
+        status = board.game_status()
+        if status != TTT_GAME_NOT_OVER:
+            if status in [TTT_PLAYER1_WIN, TTT_PLAYER2_WIN]:
+                print(f"player {status} won")
+            else:  # status == TTT_TIE
+                print("Tie Game")
+            break
+        player = player ^ 1  # swaps between players 0 and 1
+    """
+
     # indices of the winning lines: vertical(x3), horizontal(x3), diagonal(x2)
     winning_combinations = [
         (0, 1, 2),
@@ -25,20 +54,18 @@ class Board:
     ]
 
     def __init__(self):
-        # internally self.board.squares holds a flat representation of tic tac toe board
-        # where an empty board is [0, 0, 0, 0, 0, 0, 0, 0, 0]
-        # where indexes are column wise order
+        # self.squares holds a flat representation of the tic tac toe board.
+        # an empty board is [0, 0, 0, 0, 0, 0, 0, 0, 0].
+        # player 1's squares are marked 1, while player 2's are marked 2.
+        # mapping of the flat indices to the 3x3 grid is as follows:
         # 0 3 6
         # 1 4 7
         # 2 5 8
-
-        # empty -- 0
-        # player 0 -- 1
-        # player 1 -- 2
         self.squares = [0] * 9
 
     @property
     def _n_empty_squares(self):
+        """The current number of empty squares on the board."""
         return self.squares.count(0)
 
     def reset(self):
@@ -49,7 +76,7 @@ def play_turn(self, agent, pos):
         """Place a mark by the agent in the spot given.
 
         The following are required for a move to be valid:
-        * The agent must be a known agent (either 0 or 1).
+        * The agent must be a known agent ID (either 0 or 1).
         * The spot must be be empty.
         * The spot must be in the board (integer: 0 <= spot <= 8)
 
@@ -82,4 +109,5 @@ def __str__(self):
         return str(self.squares)
 
     def legal_moves(self):
+        """Return list of legal moves (as flat indices for spaces on the board)."""
         return [i for i, mark in enumerate(self.squares) if mark == 0]
diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py
index bd14255a3..e95f6ce46 100644
--- a/pettingzoo/classic/tictactoe/tictactoe.py
+++ b/pettingzoo/classic/tictactoe/tictactoe.py
@@ -84,6 +84,7 @@
 
 
 def get_image(path):
+    """Return a pygame image loaded from the given path."""
     from os import path as os_path
 
     cwd = os_path.dirname(__file__)
@@ -92,6 +93,7 @@ def get_image(path):
 
 
 def get_font(path, size):
+    """Return a pygame font loaded from the given path."""
     from os import path as os_path
 
     cwd = os_path.dirname(__file__)
@@ -153,22 +155,14 @@ def __init__(
         if self.render_mode == "human":
             self.clock = pygame.time.Clock()
 
-    # Key
-    # ----
-    # blank space = 0
-    # agent 0 = 1
-    # agent 1 = 2
-    # An observation is list of lists, where each list represents a row
-    #
-    # [[0,0,2]
-    #  [1,2,1]
-    #  [2,1,0]]
     def observe(self, agent):
         board_vals = np.array(self.board.squares).reshape(3, 3)
         cur_player = self.possible_agents.index(agent)
         opp_player = (cur_player + 1) % 2
 
         observation = np.empty((3, 3, 2), dtype=np.int8)
+        # this will give a copy of the board that is 1 for player 1's
+        # marks and zero for every other square, whether empty or not.
         observation[:, :, 0] = np.equal(board_vals, cur_player + 1)
         observation[:, :, 1] = np.equal(board_vals, opp_player + 1)
 
@@ -179,6 +173,8 @@ def observe(self, agent):
     def _get_mask(self, agent):
         action_mask = np.zeros(9, dtype=np.int8)
 
+        # Per the documentation, the mask of any agent other than the
+        # currently selected one is all zeros.
         if agent == self.agent_selection:
             for i in self.board.legal_moves():
                 action_mask[i] = 1
@@ -222,7 +218,6 @@ def step(self, action):
             self.render()
 
     def reset(self, seed=None, options=None):
-        # reset environment
         self.board.reset()
 
         self.agents = self.possible_agents[:]

From 72ef62fb0c6fdac9e16e32f94564b349ddd5efa2 Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Wed, 20 Mar 2024 03:04:31 +0000
Subject: [PATCH 17/23] Bump TicTacToe environment to version 4

---
 docs/api/wrappers/pz_wrappers.md                        | 4 ++--
 pettingzoo/classic/all_modules.py                       | 4 ++--
 pettingzoo/classic/tictactoe/tictactoe.py               | 4 ++--
 pettingzoo/classic/{tictactoe_v3.py => tictactoe_v4.py} | 0
 pettingzoo/test/api_test.py                             | 4 ++--
 test/all_parameter_combs_test.py                        | 4 ++--
 tutorials/LangChain/langchain_example.py                | 4 ++--
 tutorials/SB3/test/test_sb3_action_mask.py              | 4 ++--
 tutorials/Tianshou/2_training_agents.py                 | 4 ++--
 tutorials/Tianshou/3_cli_and_logging.py                 | 4 ++--
 10 files changed, 18 insertions(+), 18 deletions(-)
 rename pettingzoo/classic/{tictactoe_v3.py => tictactoe_v4.py} (100%)

diff --git a/docs/api/wrappers/pz_wrappers.md b/docs/api/wrappers/pz_wrappers.md
index d3eb21c38..08eba7bd4 100644
--- a/docs/api/wrappers/pz_wrappers.md
+++ b/docs/api/wrappers/pz_wrappers.md
@@ -65,8 +65,8 @@ You can apply these wrappers to your environment in a similar manner to the belo
 To wrap an AEC environment:
 ```python
 from pettingzoo.utils import TerminateIllegalWrapper
-from pettingzoo.classic import tictactoe_v3
-env = tictactoe_v3.env()
+from pettingzoo.classic import tictactoe_v4
+env = tictactoe_v4.env()
 env = TerminateIllegalWrapper(env, illegal_reward=-1)
 
 env.reset()
diff --git a/pettingzoo/classic/all_modules.py b/pettingzoo/classic/all_modules.py
index e9291d15d..621d1c9d3 100644
--- a/pettingzoo/classic/all_modules.py
+++ b/pettingzoo/classic/all_modules.py
@@ -8,14 +8,14 @@
     rps_v2,
     texas_holdem_no_limit_v6,
     texas_holdem_v4,
-    tictactoe_v3,
+    tictactoe_v4,
 )
 
 classic_environments = {
     "classic/chess_v6": chess_v6,
     "classic/rps_v2": rps_v2,
     "classic/connect_four_v3": connect_four_v3,
-    "classic/tictactoe_v3": tictactoe_v3,
+    "classic/tictactoe_v4": tictactoe_v4,
     "classic/leduc_holdem_v4": leduc_holdem_v4,
     "classic/texas_holdem_v4": texas_holdem_v4,
     "classic/texas_holdem_no_limit_v6": texas_holdem_no_limit_v6,
diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py
index e95f6ce46..3d4c96fe6 100644
--- a/pettingzoo/classic/tictactoe/tictactoe.py
+++ b/pettingzoo/classic/tictactoe/tictactoe.py
@@ -9,7 +9,7 @@
 
 This environment is part of the <a href='..'>classic environments</a>. Please read that page first for general information.
 
-| Import             | `from pettingzoo.classic import tictactoe_v3` |
+| Import             | `from pettingzoo.classic import tictactoe_v4` |
 |--------------------|-----------------------------------------------|
 | Actions            | Discrete                                      |
 | Parallel API       | Yes                                           |
@@ -112,7 +112,7 @@ def env(**kwargs):
 class raw_env(AECEnv, EzPickle):
     metadata = {
         "render_modes": ["human", "rgb_array"],
-        "name": "tictactoe_v3",
+        "name": "tictactoe_v4",
         "is_parallelizable": False,
         "render_fps": 1,
     }
diff --git a/pettingzoo/classic/tictactoe_v3.py b/pettingzoo/classic/tictactoe_v4.py
similarity index 100%
rename from pettingzoo/classic/tictactoe_v3.py
rename to pettingzoo/classic/tictactoe_v4.py
diff --git a/pettingzoo/test/api_test.py b/pettingzoo/test/api_test.py
index f8718579c..4b461274f 100644
--- a/pettingzoo/test/api_test.py
+++ b/pettingzoo/test/api_test.py
@@ -73,7 +73,7 @@ def action_mask():
     "go_v5",
     "chess_v6",
     "connect_four_v3",
-    "tictactoe_v3",
+    "tictactoe_v4",
     "gin_rummy_v4",
 ]
 env_graphical_obs = ["knights_archers_zombies_v10"]
@@ -96,7 +96,7 @@ def action_mask():
     "knights_archers_zombies_v10",
     "chess_v6",
     "connect_four_v3",
-    "tictactoe_v3",
+    "tictactoe_v4",
     "gin_rummy_v4",
 ]
 env_diff_agent_obs_size = [
diff --git a/test/all_parameter_combs_test.py b/test/all_parameter_combs_test.py
index cff5a7cc3..e853c498c 100644
--- a/test/all_parameter_combs_test.py
+++ b/test/all_parameter_combs_test.py
@@ -43,7 +43,7 @@
     rps_v2,
     texas_holdem_no_limit_v6,
     texas_holdem_v4,
-    tictactoe_v3,
+    tictactoe_v4,
 )
 from pettingzoo.mpe import (
     simple_adversary_v3,
@@ -104,7 +104,7 @@
     ["classic/connect_four_v3", connect_four_v3, dict()],
     ["classic/rps_v2", rps_v2, dict()],
     ["classic/chess_v6", chess_v6, dict()],
-    ["classic/tictactoe_v3", tictactoe_v3, dict()],
+    ["classic/tictactoe_v4", tictactoe_v4, dict()],
     ["classic/gin_rummy_v4", gin_rummy_v4, dict()],
     ["classic/gin_rummy_v4", gin_rummy_v4, dict(opponents_hand_visible=True)],
     ["mpe/simple_v3", simple_v3, dict(max_cycles=50)],
diff --git a/tutorials/LangChain/langchain_example.py b/tutorials/LangChain/langchain_example.py
index 5b88bfcc5..9730e0212 100644
--- a/tutorials/LangChain/langchain_example.py
+++ b/tutorials/LangChain/langchain_example.py
@@ -37,9 +37,9 @@ def rock_paper_scissors():
 
 
 def tic_tac_toe():
-    from pettingzoo.classic import tictactoe_v3
+    from pettingzoo.classic import tictactoe_v4
 
-    env = tictactoe_v3.env(render_mode="human")
+    env = tictactoe_v4.env(render_mode="human")
     agents = {
         name: ActionMaskAgent(name=name, model=ChatOpenAI(temperature=0.2), env=env)
         for name in env.possible_agents
diff --git a/tutorials/SB3/test/test_sb3_action_mask.py b/tutorials/SB3/test/test_sb3_action_mask.py
index 3835af393..b7613c023 100644
--- a/tutorials/SB3/test/test_sb3_action_mask.py
+++ b/tutorials/SB3/test/test_sb3_action_mask.py
@@ -10,7 +10,7 @@
     leduc_holdem_v4,
     texas_holdem_no_limit_v6,
     texas_holdem_v4,
-    tictactoe_v3,
+    tictactoe_v4,
 )
 
 pytest.importorskip("stable_baselines3")
@@ -30,7 +30,7 @@
 MEDIUM_ENVS = [
     leduc_holdem_v4,  # with 10x as many steps it gets higher total rewards (9 vs -9), 0.52 winrate, and 0.92 vs 0.83 total scores
     hanabi_v5,  # even with 10x as many steps, total score seems to always be tied between the two agents
-    tictactoe_v3,  # even with 10x as many steps, agent still loses every time (most likely an error somewhere)
+    tictactoe_v4,  # even with 10x as many steps, agent still loses every time (most likely an error somewhere)
     chess_v6,  # difficult to train because games take so long, performance varies heavily
 ]
 
diff --git a/tutorials/Tianshou/2_training_agents.py b/tutorials/Tianshou/2_training_agents.py
index 30ae1b159..19395b80a 100644
--- a/tutorials/Tianshou/2_training_agents.py
+++ b/tutorials/Tianshou/2_training_agents.py
@@ -22,7 +22,7 @@
 from tianshou.trainer import offpolicy_trainer
 from tianshou.utils.net.common import Net
 
-from pettingzoo.classic import tictactoe_v3
+from pettingzoo.classic import tictactoe_v4
 
 
 def _get_agents(
@@ -64,7 +64,7 @@ def _get_agents(
 
 def _get_env():
     """This function is needed to provide callables for DummyVectorEnv."""
-    return PettingZooEnv(tictactoe_v3.env())
+    return PettingZooEnv(tictactoe_v4.env())
 
 
 if __name__ == "__main__":
diff --git a/tutorials/Tianshou/3_cli_and_logging.py b/tutorials/Tianshou/3_cli_and_logging.py
index b11abe574..9526ec68e 100644
--- a/tutorials/Tianshou/3_cli_and_logging.py
+++ b/tutorials/Tianshou/3_cli_and_logging.py
@@ -26,7 +26,7 @@
 from tianshou.utils.net.common import Net
 from torch.utils.tensorboard import SummaryWriter
 
-from pettingzoo.classic import tictactoe_v3
+from pettingzoo.classic import tictactoe_v4
 
 
 def get_parser() -> argparse.ArgumentParser:
@@ -146,7 +146,7 @@ def get_agents(
 
 
 def get_env(render_mode=None):
-    return PettingZooEnv(tictactoe_v3.env(render_mode=render_mode))
+    return PettingZooEnv(tictactoe_v4.env(render_mode=render_mode))
 
 
 def train_agent(

From e4bd2282521a8f027b4af5de0175c52075c10538 Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Wed, 20 Mar 2024 04:26:34 +0000
Subject: [PATCH 18/23] Add __future__ annotations to TicTacToe tests

---
 pettingzoo/classic/tictactoe/test_board.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pettingzoo/classic/tictactoe/test_board.py b/pettingzoo/classic/tictactoe/test_board.py
index bc05dd68a..64e6e3482 100644
--- a/pettingzoo/classic/tictactoe/test_board.py
+++ b/pettingzoo/classic/tictactoe/test_board.py
@@ -1,5 +1,7 @@
 """Test cases for TicTacToe board."""
 
+from __future__ import annotations
+
 from typing import Any
 
 import pytest

From 1f95299b7ada8ce4a2421d8e4820a18b2d5f2eb2 Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Fri, 22 Mar 2024 14:52:23 +0000
Subject: [PATCH 19/23] Change TicTacToe from medium to easy in SB3 test.

---
 tutorials/SB3/test/test_sb3_action_mask.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tutorials/SB3/test/test_sb3_action_mask.py b/tutorials/SB3/test/test_sb3_action_mask.py
index b7613c023..255e77436 100644
--- a/tutorials/SB3/test/test_sb3_action_mask.py
+++ b/tutorials/SB3/test/test_sb3_action_mask.py
@@ -24,13 +24,13 @@
     gin_rummy_v4,
     texas_holdem_no_limit_v6,  # texas holdem human rendered game ends instantly, but with random actions it works fine
     texas_holdem_v4,
+    tictactoe_v4,
 ]
 
 # More difficult environments which will likely take more training time
 MEDIUM_ENVS = [
     leduc_holdem_v4,  # with 10x as many steps it gets higher total rewards (9 vs -9), 0.52 winrate, and 0.92 vs 0.83 total scores
     hanabi_v5,  # even with 10x as many steps, total score seems to always be tied between the two agents
-    tictactoe_v4,  # even with 10x as many steps, agent still loses every time (most likely an error somewhere)
     chess_v6,  # difficult to train because games take so long, performance varies heavily
 ]
 
@@ -50,8 +50,10 @@ def test_action_mask_easy(env_fn):
 
     env_kwargs = {}
 
-    # Leduc Hold`em takes slightly longer to outperform random
-    steps = 8192 if env_fn != leduc_holdem_v4 else 8192 * 4
+    steps = 8192
+    # These take slightly longer to outperform random
+    if env_fn in [leduc_holdem_v4, tictactoe_v4]:
+        steps *= 4
 
     # Train a model against itself (takes ~2 minutes on GPU)
     train_action_mask(env_fn, steps=steps, seed=0, **env_kwargs)
@@ -92,7 +94,7 @@ def test_action_mask_medium(env_fn):
 
     assert (
         winrate < 0.75
-    ), "Policy should not perform better than 75% winrate"  # 30-40% for leduc, 0% for hanabi, 0% for tic-tac-toe
+    ), "Policy should not perform better than 75% winrate"  # 30-40% for leduc, 0% for hanabi
 
     # Watch two games (disabled by default)
     # eval_action_mask(env_fn, num_games=2, render_mode="human", **env_kwargs)

From 39de4954d783dc4a7bf3187603b26a8ba0607cf0 Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Fri, 22 Mar 2024 15:22:51 +0000
Subject: [PATCH 20/23] Replace TicTacToe exceptions with asserts

---
 pettingzoo/classic/tictactoe/board.py      | 19 ++++---------------
 pettingzoo/classic/tictactoe/test_board.py |  7 +++----
 2 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/board.py b/pettingzoo/classic/tictactoe/board.py
index 230e2775a..e6fee6853 100644
--- a/pettingzoo/classic/tictactoe/board.py
+++ b/pettingzoo/classic/tictactoe/board.py
@@ -1,10 +1,3 @@
-class BadTicTacToeMoveException(Exception):
-    """Exception raised when a bad move is made on TicTacToe board."""
-
-    def __init__(self, message="Bad TicTacToe move"):
-        super().__init__(message)
-
-
 TTT_PLAYER1_WIN = 0
 TTT_PLAYER2_WIN = 1
 TTT_TIE = -1
@@ -80,15 +73,11 @@ def play_turn(self, agent, pos):
         * The spot must be be empty.
         * The spot must be in the board (integer: 0 <= spot <= 8)
 
-        If any of those are not true, a BadTicTacToeMoveException
-        will be raised.
+        If any of those are not true, an assertion will fail.
         """
-        if pos < 0 or pos > 8:
-            raise BadTicTacToeMoveException("Invalid move location")
-        if agent != 0 and agent != 1:
-            raise BadTicTacToeMoveException("Invalid agent")
-        if self.squares[pos] != 0:
-            raise BadTicTacToeMoveException("Location is not empty")
+        assert pos >= 0 and pos <= 8, "Invalid move location"
+        assert agent in [0, 1], "Invalid agent"
+        assert self.squares[pos] == 0, "Location is not empty"
 
         # agent is [0, 1]. board values are stored as [1, 2].
         self.squares[pos] = agent + 1
diff --git a/pettingzoo/classic/tictactoe/test_board.py b/pettingzoo/classic/tictactoe/test_board.py
index 64e6e3482..248ce4175 100644
--- a/pettingzoo/classic/tictactoe/test_board.py
+++ b/pettingzoo/classic/tictactoe/test_board.py
@@ -11,7 +11,6 @@
     TTT_PLAYER1_WIN,
     TTT_PLAYER2_WIN,
     TTT_TIE,
-    BadTicTacToeMoveException,
     Board,
 )
 
@@ -113,16 +112,16 @@ def test_tictactoe_bad_move() -> None:
     board = Board()
     # 1) move out of bounds should be rejected
     for outside_space in [-1, 9]:
-        with pytest.raises(BadTicTacToeMoveException):
+        with pytest.raises(AssertionError):
             board.play_turn(0, outside_space)
 
     # 2) move by unknown agent should be rejected
     for unknown_agent in [-1, 2]:
-        with pytest.raises(BadTicTacToeMoveException):
+        with pytest.raises(AssertionError):
             board.play_turn(unknown_agent, 0)
 
     # 3) move in occupied space by either agent should be rejected
     board.play_turn(0, 4)  # this is fine
     for agent in [0, 1]:
-        with pytest.raises(BadTicTacToeMoveException):
+        with pytest.raises(AssertionError):
             board.play_turn(agent, 4)  # repeating move is not valid

From 6336393cdf2af095c1180cf5da7bcd85d68b3914 Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Fri, 22 Mar 2024 18:04:23 +0000
Subject: [PATCH 21/23] Check messages of assert errors in tictactoe test

---
 pettingzoo/classic/tictactoe/test_board.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/test_board.py b/pettingzoo/classic/tictactoe/test_board.py
index 248ce4175..b8f7e9248 100644
--- a/pettingzoo/classic/tictactoe/test_board.py
+++ b/pettingzoo/classic/tictactoe/test_board.py
@@ -112,16 +112,16 @@ def test_tictactoe_bad_move() -> None:
     board = Board()
     # 1) move out of bounds should be rejected
     for outside_space in [-1, 9]:
-        with pytest.raises(AssertionError):
+        with pytest.raises(AssertionError, match="Invalid move location"):
             board.play_turn(0, outside_space)
 
     # 2) move by unknown agent should be rejected
     for unknown_agent in [-1, 2]:
-        with pytest.raises(AssertionError):
+        with pytest.raises(AssertionError, match="Invalid agent"):
             board.play_turn(unknown_agent, 0)
 
     # 3) move in occupied space by either agent should be rejected
     board.play_turn(0, 4)  # this is fine
     for agent in [0, 1]:
-        with pytest.raises(AssertionError):
+        with pytest.raises(AssertionError, match="Location is not empty"):
             board.play_turn(agent, 4)  # repeating move is not valid

From 71ca217339c8709fca4bb103f093c848feca1ad7 Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Fri, 3 May 2024 16:17:22 +0000
Subject: [PATCH 22/23] Fix agent swap in TicTacToe

This now always switches agents in each step. The previous change was a
bug that "fixed" the behaviour due to a bug in the SB3 tutorial.

The agent should be swapped every step as is done now.
---
 pettingzoo/classic/tictactoe/tictactoe.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py
index 3d4c96fe6..cd3a5c1ab 100644
--- a/pettingzoo/classic/tictactoe/tictactoe.py
+++ b/pettingzoo/classic/tictactoe/tictactoe.py
@@ -210,9 +210,8 @@ def step(self, action):
             # once either play wins or there is a draw, game over, both players are done
             self.terminations = {i: True for i in self.agents}
             self._accumulate_rewards()
-        else:
-            # If the game is still going, switch selection to next agent
-            self.agent_selection = self._agent_selector.next()
+
+        self.agent_selection = self._agent_selector.next()
 
         if self.render_mode == "human":
             self.render()

From 4170f2b3dd711a7c0b29876d8f01e946dac49ecd Mon Sep 17 00:00:00 2001
From: David Ackerman <145808634+dm-ackerman@users.noreply.github.com>
Date: Fri, 3 May 2024 16:27:59 +0000
Subject: [PATCH 23/23] revert TicTacToe version to 3

The change to v4 was due to the agent handling at the end of an episode

The other changes to the env don't change the behaviour of the env, so
it is left at v3.
---
 docs/api/wrappers/pz_wrappers.md                        | 4 ++--
 pettingzoo/classic/all_modules.py                       | 4 ++--
 pettingzoo/classic/tictactoe/tictactoe.py               | 4 ++--
 pettingzoo/classic/{tictactoe_v4.py => tictactoe_v3.py} | 0
 pettingzoo/test/api_test.py                             | 4 ++--
 test/all_parameter_combs_test.py                        | 4 ++--
 tutorials/LangChain/langchain_example.py                | 4 ++--
 tutorials/SB3/test/test_sb3_action_mask.py              | 6 +++---
 tutorials/Tianshou/2_training_agents.py                 | 4 ++--
 tutorials/Tianshou/3_cli_and_logging.py                 | 4 ++--
 10 files changed, 19 insertions(+), 19 deletions(-)
 rename pettingzoo/classic/{tictactoe_v4.py => tictactoe_v3.py} (100%)

diff --git a/docs/api/wrappers/pz_wrappers.md b/docs/api/wrappers/pz_wrappers.md
index 08eba7bd4..d3eb21c38 100644
--- a/docs/api/wrappers/pz_wrappers.md
+++ b/docs/api/wrappers/pz_wrappers.md
@@ -65,8 +65,8 @@ You can apply these wrappers to your environment in a similar manner to the belo
 To wrap an AEC environment:
 ```python
 from pettingzoo.utils import TerminateIllegalWrapper
-from pettingzoo.classic import tictactoe_v4
-env = tictactoe_v4.env()
+from pettingzoo.classic import tictactoe_v3
+env = tictactoe_v3.env()
 env = TerminateIllegalWrapper(env, illegal_reward=-1)
 
 env.reset()
diff --git a/pettingzoo/classic/all_modules.py b/pettingzoo/classic/all_modules.py
index 621d1c9d3..e9291d15d 100644
--- a/pettingzoo/classic/all_modules.py
+++ b/pettingzoo/classic/all_modules.py
@@ -8,14 +8,14 @@
     rps_v2,
     texas_holdem_no_limit_v6,
     texas_holdem_v4,
-    tictactoe_v4,
+    tictactoe_v3,
 )
 
 classic_environments = {
     "classic/chess_v6": chess_v6,
     "classic/rps_v2": rps_v2,
     "classic/connect_four_v3": connect_four_v3,
-    "classic/tictactoe_v4": tictactoe_v4,
+    "classic/tictactoe_v3": tictactoe_v3,
     "classic/leduc_holdem_v4": leduc_holdem_v4,
     "classic/texas_holdem_v4": texas_holdem_v4,
     "classic/texas_holdem_no_limit_v6": texas_holdem_no_limit_v6,
diff --git a/pettingzoo/classic/tictactoe/tictactoe.py b/pettingzoo/classic/tictactoe/tictactoe.py
index cd3a5c1ab..7f93442f4 100644
--- a/pettingzoo/classic/tictactoe/tictactoe.py
+++ b/pettingzoo/classic/tictactoe/tictactoe.py
@@ -9,7 +9,7 @@
 
 This environment is part of the <a href='..'>classic environments</a>. Please read that page first for general information.
 
-| Import             | `from pettingzoo.classic import tictactoe_v4` |
+| Import             | `from pettingzoo.classic import tictactoe_v3` |
 |--------------------|-----------------------------------------------|
 | Actions            | Discrete                                      |
 | Parallel API       | Yes                                           |
@@ -112,7 +112,7 @@ def env(**kwargs):
 class raw_env(AECEnv, EzPickle):
     metadata = {
         "render_modes": ["human", "rgb_array"],
-        "name": "tictactoe_v4",
+        "name": "tictactoe_v3",
         "is_parallelizable": False,
         "render_fps": 1,
     }
diff --git a/pettingzoo/classic/tictactoe_v4.py b/pettingzoo/classic/tictactoe_v3.py
similarity index 100%
rename from pettingzoo/classic/tictactoe_v4.py
rename to pettingzoo/classic/tictactoe_v3.py
diff --git a/pettingzoo/test/api_test.py b/pettingzoo/test/api_test.py
index 4b461274f..f8718579c 100644
--- a/pettingzoo/test/api_test.py
+++ b/pettingzoo/test/api_test.py
@@ -73,7 +73,7 @@ def action_mask():
     "go_v5",
     "chess_v6",
     "connect_four_v3",
-    "tictactoe_v4",
+    "tictactoe_v3",
     "gin_rummy_v4",
 ]
 env_graphical_obs = ["knights_archers_zombies_v10"]
@@ -96,7 +96,7 @@ def action_mask():
     "knights_archers_zombies_v10",
     "chess_v6",
     "connect_four_v3",
-    "tictactoe_v4",
+    "tictactoe_v3",
     "gin_rummy_v4",
 ]
 env_diff_agent_obs_size = [
diff --git a/test/all_parameter_combs_test.py b/test/all_parameter_combs_test.py
index e853c498c..cff5a7cc3 100644
--- a/test/all_parameter_combs_test.py
+++ b/test/all_parameter_combs_test.py
@@ -43,7 +43,7 @@
     rps_v2,
     texas_holdem_no_limit_v6,
     texas_holdem_v4,
-    tictactoe_v4,
+    tictactoe_v3,
 )
 from pettingzoo.mpe import (
     simple_adversary_v3,
@@ -104,7 +104,7 @@
     ["classic/connect_four_v3", connect_four_v3, dict()],
     ["classic/rps_v2", rps_v2, dict()],
     ["classic/chess_v6", chess_v6, dict()],
-    ["classic/tictactoe_v4", tictactoe_v4, dict()],
+    ["classic/tictactoe_v3", tictactoe_v3, dict()],
     ["classic/gin_rummy_v4", gin_rummy_v4, dict()],
     ["classic/gin_rummy_v4", gin_rummy_v4, dict(opponents_hand_visible=True)],
     ["mpe/simple_v3", simple_v3, dict(max_cycles=50)],
diff --git a/tutorials/LangChain/langchain_example.py b/tutorials/LangChain/langchain_example.py
index 9730e0212..5b88bfcc5 100644
--- a/tutorials/LangChain/langchain_example.py
+++ b/tutorials/LangChain/langchain_example.py
@@ -37,9 +37,9 @@ def rock_paper_scissors():
 
 
 def tic_tac_toe():
-    from pettingzoo.classic import tictactoe_v4
+    from pettingzoo.classic import tictactoe_v3
 
-    env = tictactoe_v4.env(render_mode="human")
+    env = tictactoe_v3.env(render_mode="human")
     agents = {
         name: ActionMaskAgent(name=name, model=ChatOpenAI(temperature=0.2), env=env)
         for name in env.possible_agents
diff --git a/tutorials/SB3/test/test_sb3_action_mask.py b/tutorials/SB3/test/test_sb3_action_mask.py
index 255e77436..43b564d17 100644
--- a/tutorials/SB3/test/test_sb3_action_mask.py
+++ b/tutorials/SB3/test/test_sb3_action_mask.py
@@ -10,7 +10,7 @@
     leduc_holdem_v4,
     texas_holdem_no_limit_v6,
     texas_holdem_v4,
-    tictactoe_v4,
+    tictactoe_v3,
 )
 
 pytest.importorskip("stable_baselines3")
@@ -24,7 +24,7 @@
     gin_rummy_v4,
     texas_holdem_no_limit_v6,  # texas holdem human rendered game ends instantly, but with random actions it works fine
     texas_holdem_v4,
-    tictactoe_v4,
+    tictactoe_v3,
 ]
 
 # More difficult environments which will likely take more training time
@@ -52,7 +52,7 @@ def test_action_mask_easy(env_fn):
 
     steps = 8192
     # These take slightly longer to outperform random
-    if env_fn in [leduc_holdem_v4, tictactoe_v4]:
+    if env_fn in [leduc_holdem_v4, tictactoe_v3]:
         steps *= 4
 
     # Train a model against itself (takes ~2 minutes on GPU)
diff --git a/tutorials/Tianshou/2_training_agents.py b/tutorials/Tianshou/2_training_agents.py
index 19395b80a..30ae1b159 100644
--- a/tutorials/Tianshou/2_training_agents.py
+++ b/tutorials/Tianshou/2_training_agents.py
@@ -22,7 +22,7 @@
 from tianshou.trainer import offpolicy_trainer
 from tianshou.utils.net.common import Net
 
-from pettingzoo.classic import tictactoe_v4
+from pettingzoo.classic import tictactoe_v3
 
 
 def _get_agents(
@@ -64,7 +64,7 @@ def _get_agents(
 
 def _get_env():
     """This function is needed to provide callables for DummyVectorEnv."""
-    return PettingZooEnv(tictactoe_v4.env())
+    return PettingZooEnv(tictactoe_v3.env())
 
 
 if __name__ == "__main__":
diff --git a/tutorials/Tianshou/3_cli_and_logging.py b/tutorials/Tianshou/3_cli_and_logging.py
index 9526ec68e..b11abe574 100644
--- a/tutorials/Tianshou/3_cli_and_logging.py
+++ b/tutorials/Tianshou/3_cli_and_logging.py
@@ -26,7 +26,7 @@
 from tianshou.utils.net.common import Net
 from torch.utils.tensorboard import SummaryWriter
 
-from pettingzoo.classic import tictactoe_v4
+from pettingzoo.classic import tictactoe_v3
 
 
 def get_parser() -> argparse.ArgumentParser:
@@ -146,7 +146,7 @@ def get_agents(
 
 
 def get_env(render_mode=None):
-    return PettingZooEnv(tictactoe_v4.env(render_mode=render_mode))
+    return PettingZooEnv(tictactoe_v3.env(render_mode=render_mode))
 
 
 def train_agent(