From 0cc226cbab4f1d0efef980a3134b1ec6e97f07c8 Mon Sep 17 00:00:00 2001 From: chucklqsun Date: Sat, 27 Jan 2018 22:31:45 +1100 Subject: [PATCH] fix trasure_on_right choose_action bug --- .../1_command_line_reinforcement_learning/treasure_on_right.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contents/1_command_line_reinforcement_learning/treasure_on_right.py b/contents/1_command_line_reinforcement_learning/treasure_on_right.py index cc1e6f1..364ec34 100644 --- a/contents/1_command_line_reinforcement_learning/treasure_on_right.py +++ b/contents/1_command_line_reinforcement_learning/treasure_on_right.py @@ -34,7 +34,7 @@ def build_q_table(n_states, actions): def choose_action(state, q_table): # This is how to choose an action state_actions = q_table.iloc[state, :] - if (np.random.uniform() > EPSILON) or (state_actions.all() == 0): # act non-greedy or state-action have no value + if (np.random.uniform() > EPSILON) or (not state_actions.any()): # act non-greedy or state-action have no value action_name = np.random.choice(ACTIONS) else: # act greedy action_name = state_actions.idxmax() # replace argmax to idxmax as argmax means a different function in newer version of pandas