added comments on agent.py

NorthPhoenix · May 2, 2023 · c8c85b9 · c8c85b9
1 parent 0e899e0
commit c8c85b9
Show file tree

Hide file tree

Showing 4 changed files with 38 additions and 26 deletions.
diff --git a/Game/__pycache__/agent_based_game.cpython-310.pyc b/Game/__pycache__/agent_based_game.cpython-310.pyc
diff --git a/Game/agent.py b/Game/agent.py
@@ -1,16 +1,10 @@
 from agent_based_game import Game, Action
 import random
 import time
+import pandas as pd
 
 class QLearning:
-    def __init__(self, 
-                game,
-                alpha = 0.5,
-                gamma = 0.99,
-                epsilon_start = 0.6,
-                epsilon_end = 0.01,
-                epsilon_decay = 0.003,
-                episodes = 1000):
+    def __init__(self,  game, alpha = 0.5, gamma = 0.99, epsilon_start = 0.6, epsilon_end = 0.01, epsilon_decay = 0.003, episodes = 100):
         self.game = game
         self.alpha = alpha
         self.gamma = gamma
@@ -30,45 +24,57 @@ def _initializeQTable(self):
         for state in self.state_space:
             for action in self.action_space:
                 self.Q[(state, action)] = 0
+
+
+    def printQTable(self):
+        print(
+            pd.DataFrame(
+                [
+                    [self.Q[(state, action)] for state in self.state_space]
+                    for action in self.action_space
+                ],
+                index=self.action_space,
+                columns=self.state_space,
+            )
+        )
 
 
     def train(self):
         # Training loop
         epsilon = self.epsilon_start
         for episode in range(self.episodes):
             # visualize every 10 episodes
-            # if (episode) % 100 == 0:
-            #     game.reset(visualizeNext=True)
-            # else:
-            #     game.reset()
+            if (episode) % 10 == 0:
+                #game.reset(visualizeNext=True)
+                self.printQTable()
+            else:
+                game.reset()
             self.game.reset(visualizeNext=False)
             gameover = False
             state, _ = self.game.getState()
             total_reward = 0
 
             while not gameover:
                 # Epsilon-greedy action selection
-                if random.uniform(0, 1) < epsilon:
+                if random.uniform(0, 1) < epsilon: #If True, the agent chooses a random action from the action space
                     action = random.choice(self.action_space)
                 else:
-                    action = max(self.action_space, key=lambda a: self.Q[(state, a)])
+                    action = max(self.action_space, key=lambda a: self.Q[(state, a)]) #selects the action with the highest Q-value for the current state.
 
                 # Perform the action and receive the new state, reward, and gameover status
                 next_state, reward, gameover, _ = self.game.act(action)
-                total_reward += reward
+                total_reward += reward #Everytime the snake has positive action (i.e. mvoes closer to a target or eats it increase reward)
 
                 # Update the Q-value for the current state-action pair using the Q-learning update rule
                 old_q_value = self.Q[(state, action)]
-                next_q_value = max([self.Q[(next_state, a)] for a in self.action_space])
-                self.Q[(state, action)] = old_q_value + self.alpha * (reward + self.gamma * next_q_value - old_q_value)
-                state = next_state
+                next_q_value = max([self.Q[(next_state, a)] for a in self.action_space]) #Finds the Q-Value of the next best move the snake can make
+                self.Q[(state, action)] = old_q_value + self.alpha * (reward + self.gamma * next_q_value - old_q_value)  ### BellMan equation to calculate new Q-value ###
+                state = next_state #Updates the 'state' variable to the state the snake is in after taking one action(moving)
 
             print(f"Episode {episode + 1}/{self.episodes} completed")
             # print(f"Total Reward (Train): {total_reward}")
-            # print Q-table
-            # for state in state_space:
-            #     for action in action_space:
-            #         print(f"Q[{state}, {action}]: {Q[(state, action)]}")
+            #print Q-table
+            #self.printQTable()
 
             # Decay epsilon to balance exploration and exploitation
             epsilon = max(self.epsilon_end, epsilon * self.epsilon_decay)

diff --git a/Game/agent_based_game.py b/Game/agent_based_game.py
@@ -14,7 +14,7 @@ class Action(Enum):
     DOWN_RIGHT = 8
 
 class Game:
-    def __init__(self, runtime=15, fps=60, target_reward=100, miss_reward=-1, visualize=False):
+    def __init__(self, runtime=30, fps=60, target_reward=100, miss_reward=-1, visualize=False):
         # Initialize Pygame after __main__ is executed
         self.initialized = False
         self._initializeGame()      #Sets the 'initialized' bool variable to true/ enables pygame modules to work / gives display window the name "game"
@@ -33,7 +33,7 @@ def __init__(self, runtime=15, fps=60, target_reward=100, miss_reward=-1, visual
         self.MISS_REWARD = miss_reward      #set to -1 from parameters
         self.PLAYER_DIMENTIONS = (80, 80)   #The size of the snake head
         self.TARGET_DIMENTIONS = (40, 40)   #Size of the targets
-        self.TARGET_NUMBER = 3              #Number of targets on the screen at once
+        self.TARGET_NUMBER = 1              #Number of targets on the screen at once
         self.SPEED = 5
         self.GAME_DURATION = runtime        #How long one game is (15 seconds from parameters)
         self.GAME_DURATION_IN_FRAMES = runtime * fps   #How long training will be = total number of frames and how fast your computer can run thru them
@@ -151,7 +151,12 @@ def reset(self, visualizeNext=False):
         self.player.moveTo(self.SCREEN_WIDTH//2, self.SCREEN_HEIGHT//2)
 
 
-    #Runs after every move the snake makes
+    #This function Runs after every move the snake makes
+    #This function moves the snake according to what action is passed
+    #This fucntion detects if collision occured if so add reward/calls for more targets to be generated
+    #This function also rewards negetive points if no target was eatin during said action
+    #This function also returns the state of the game (where the target is in relation to the snake)
+    #This function also vizualises the game/checks if its game over
     def act(self, action: Action):
         for event in pygame.event.get(): #Looks through pyGame eventQueue and check if any of the events were 'quit' (clicking X to close window)
             if event.type == QUIT:
@@ -306,7 +311,7 @@ def getScore(self):
         action = random.choice(list(Action)) #makes the agent pick an action from the actions list (i.e. Up,Down,Left,Right......) and stores it in action variable
 
         state, reward, gameover, score = game.act(action) #Then call game.act(action) to perform the action
-                                                          #game.act(action) returns a tuple (state, reward, gameover, score)
+                                                          #game.act(action) returns a tuple (current state, reward, gameover, score)
 
         totalReward += reward #Then you can do whatever you want with the returned values
 

diff --git a/ML-Q-learning-Project b/ML-Q-learning-Project