From f677ef2b1492dae43164e24992e22de76e3fb226 Mon Sep 17 00:00:00 2001
From: chucklqsun <bartowski.sun@gmail.com>
Date: Sun, 28 Jan 2018 02:17:08 +1100
Subject: [PATCH] more intuitive solution

---
 .../1_command_line_reinforcement_learning/treasure_on_right.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contents/1_command_line_reinforcement_learning/treasure_on_right.py b/contents/1_command_line_reinforcement_learning/treasure_on_right.py
index 364ec34..ec3ab02 100644
--- a/contents/1_command_line_reinforcement_learning/treasure_on_right.py
+++ b/contents/1_command_line_reinforcement_learning/treasure_on_right.py
@@ -34,7 +34,7 @@ def build_q_table(n_states, actions):
 def choose_action(state, q_table):
     # This is how to choose an action
     state_actions = q_table.iloc[state, :]
-    if (np.random.uniform() > EPSILON) or (not state_actions.any()):  # act non-greedy or state-action have no value
+    if (np.random.uniform() > EPSILON) or ((state_actions == 0).all()):  # act non-greedy or state-action have no value
         action_name = np.random.choice(ACTIONS)
     else:   # act greedy
         action_name = state_actions.idxmax()    # replace argmax to idxmax as argmax means a different function in newer version of pandas