Skip to content

Commit

Permalink
added comments on agent.py
Browse files Browse the repository at this point in the history
  • Loading branch information
leoperez55 committed May 2, 2023
1 parent 0e899e0 commit c8c85b9
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 26 deletions.
Binary file modified Game/__pycache__/agent_based_game.cpython-310.pyc
Binary file not shown.
50 changes: 28 additions & 22 deletions Game/agent.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
from agent_based_game import Game, Action
import random
import time
import pandas as pd

class QLearning:
def __init__(self,
game,
alpha = 0.5,
gamma = 0.99,
epsilon_start = 0.6,
epsilon_end = 0.01,
epsilon_decay = 0.003,
episodes = 1000):
def __init__(self, game, alpha = 0.5, gamma = 0.99, epsilon_start = 0.6, epsilon_end = 0.01, epsilon_decay = 0.003, episodes = 100):
self.game = game
self.alpha = alpha
self.gamma = gamma
Expand All @@ -30,45 +24,57 @@ def _initializeQTable(self):
for state in self.state_space:
for action in self.action_space:
self.Q[(state, action)] = 0


def printQTable(self):
print(
pd.DataFrame(
[
[self.Q[(state, action)] for state in self.state_space]
for action in self.action_space
],
index=self.action_space,
columns=self.state_space,
)
)


def train(self):
# Training loop
epsilon = self.epsilon_start
for episode in range(self.episodes):
# visualize every 10 episodes
# if (episode) % 100 == 0:
# game.reset(visualizeNext=True)
# else:
# game.reset()
if (episode) % 10 == 0:
#game.reset(visualizeNext=True)
self.printQTable()
else:
game.reset()
self.game.reset(visualizeNext=False)
gameover = False
state, _ = self.game.getState()
total_reward = 0

while not gameover:
# Epsilon-greedy action selection
if random.uniform(0, 1) < epsilon:
if random.uniform(0, 1) < epsilon: #If True, the agent chooses a random action from the action space
action = random.choice(self.action_space)
else:
action = max(self.action_space, key=lambda a: self.Q[(state, a)])
action = max(self.action_space, key=lambda a: self.Q[(state, a)]) #selects the action with the highest Q-value for the current state.

# Perform the action and receive the new state, reward, and gameover status
next_state, reward, gameover, _ = self.game.act(action)
total_reward += reward
total_reward += reward #Everytime the snake has positive action (i.e. mvoes closer to a target or eats it increase reward)

# Update the Q-value for the current state-action pair using the Q-learning update rule
old_q_value = self.Q[(state, action)]
next_q_value = max([self.Q[(next_state, a)] for a in self.action_space])
self.Q[(state, action)] = old_q_value + self.alpha * (reward + self.gamma * next_q_value - old_q_value)
state = next_state
next_q_value = max([self.Q[(next_state, a)] for a in self.action_space]) #Finds the Q-Value of the next best move the snake can make
self.Q[(state, action)] = old_q_value + self.alpha * (reward + self.gamma * next_q_value - old_q_value) ### BellMan equation to calculate new Q-value ###
state = next_state #Updates the 'state' variable to the state the snake is in after taking one action(moving)

print(f"Episode {episode + 1}/{self.episodes} completed")
# print(f"Total Reward (Train): {total_reward}")
# print Q-table
# for state in state_space:
# for action in action_space:
# print(f"Q[{state}, {action}]: {Q[(state, action)]}")
#print Q-table
#self.printQTable()

# Decay epsilon to balance exploration and exploitation
epsilon = max(self.epsilon_end, epsilon * self.epsilon_decay)
Expand Down
13 changes: 9 additions & 4 deletions Game/agent_based_game.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class Action(Enum):
DOWN_RIGHT = 8

class Game:
def __init__(self, runtime=15, fps=60, target_reward=100, miss_reward=-1, visualize=False):
def __init__(self, runtime=30, fps=60, target_reward=100, miss_reward=-1, visualize=False):
# Initialize Pygame after __main__ is executed
self.initialized = False
self._initializeGame() #Sets the 'initialized' bool variable to true/ enables pygame modules to work / gives display window the name "game"
Expand All @@ -33,7 +33,7 @@ def __init__(self, runtime=15, fps=60, target_reward=100, miss_reward=-1, visual
self.MISS_REWARD = miss_reward #set to -1 from parameters
self.PLAYER_DIMENTIONS = (80, 80) #The size of the snake head
self.TARGET_DIMENTIONS = (40, 40) #Size of the targets
self.TARGET_NUMBER = 3 #Number of targets on the screen at once
self.TARGET_NUMBER = 1 #Number of targets on the screen at once
self.SPEED = 5
self.GAME_DURATION = runtime #How long one game is (15 seconds from parameters)
self.GAME_DURATION_IN_FRAMES = runtime * fps #How long training will be = total number of frames and how fast your computer can run thru them
Expand Down Expand Up @@ -151,7 +151,12 @@ def reset(self, visualizeNext=False):
self.player.moveTo(self.SCREEN_WIDTH//2, self.SCREEN_HEIGHT//2)


#Runs after every move the snake makes
#This function Runs after every move the snake makes
#This function moves the snake according to what action is passed
#This fucntion detects if collision occured if so add reward/calls for more targets to be generated
#This function also rewards negetive points if no target was eatin during said action
#This function also returns the state of the game (where the target is in relation to the snake)
#This function also vizualises the game/checks if its game over
def act(self, action: Action):
for event in pygame.event.get(): #Looks through pyGame eventQueue and check if any of the events were 'quit' (clicking X to close window)
if event.type == QUIT:
Expand Down Expand Up @@ -306,7 +311,7 @@ def getScore(self):
action = random.choice(list(Action)) #makes the agent pick an action from the actions list (i.e. Up,Down,Left,Right......) and stores it in action variable

state, reward, gameover, score = game.act(action) #Then call game.act(action) to perform the action
#game.act(action) returns a tuple (state, reward, gameover, score)
#game.act(action) returns a tuple (current state, reward, gameover, score)

totalReward += reward #Then you can do whatever you want with the returned values

Expand Down
1 change: 1 addition & 0 deletions ML-Q-learning-Project
Submodule ML-Q-learning-Project added at 0e899e

0 comments on commit c8c85b9

Please sign in to comment.