Merge pull request wenkesj#10 from kiwi-chang/chihshun/cdqn

Fix bug that game continued if there is only one user
unaeat · Jul 26, 2018 · 874326b · 874326b
2 parents 740d506 + e108392
commit 874326b
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 15 deletions.
diff --git a/agent/CdqnAgent.py b/agent/CdqnAgent.py
@@ -8,9 +8,10 @@
 
 
 class CdqnModel:
-    stateSize = 10  # [monteCarlo, playerCount, remainChips, investChips, pot, toCall, oneHotRound0, oneHotRound1, oneHotRound2, oneHotRound3]
-    actionTrain = {0: 'FOLD', 1: 'CHECK', 2: 'RAISE*1', 3: 'RAISE*2', 4: 'RAISE*4', 5: 'RAISE*8', \
-                   6: 'RAISE*16', 7: 'RAISE*32', 8: 'RAISE*48', 9: 'RAISE*64', 10: 'RAISE*100'}
+    stateSize = 11  # [monteCarlo, playerCount, remainChips, investChips, pot, toCall, oneHotRound0, oneHotRound1, oneHotRound2, oneHotRound3, oneHotRound4]
+    # actionTrain = {0: 'FOLD', 1: 'CHECK', 2: 'RAISE*1', 3: 'RAISE*2', 4: 'RAISE*4', 5: 'RAISE*8', \
+    #                6: 'RAISE*16', 7: 'RAISE*32', 8: 'RAISE*48', 9: 'RAISE*64', 10: 'RAISE*100'}
+    actionTrain = {0: 'FOLD', 1: 'CHECK', 2: 'RAISE*1', 3: 'RAISE*2'}
 
     def __init__(self, model_name_prefix="test", deep_q=None):
         # self.reload_left = 2
@@ -131,13 +132,16 @@ def _transAction(self, minBet, actionID):
 
     def _toOneHotRound(self, round):
         if round == 0:
-            return [1, 0, 0, 0]
+            return [1, 0, 0, 0, 0]
         elif round == 1:
-            return [0, 1, 0, 0]
+            return [0, 1, 0, 0, 0]
         elif round == 2:
-            return [0, 0, 1, 0]
+            return [0, 0, 1, 0, 0]
+        elif round == 3:
+            return [0, 0, 0, 1, 0]
         else:
-            return [0, 0, 0, 1]
+            return [0, 0, 0, 0, 1]
+
 
 # observation, actionID, reward, newObservation, done
 # observation: 

diff --git a/holdem/env.py b/holdem/env.py
@@ -264,7 +264,7 @@ def step(self, actions):
             players.remove(folded_player)
             self._folded_players.append(folded_player)
 
-        if len([p for p in players if not p.isallin]) < 1:
+        if len([p for p in players if not p.isallin]) < 1 or len(players) <= 1:
             while self._round < 4:
                 self._resolve(players)
         elif self._current_player.playedthisround:

diff --git a/shared_dqn_example.py b/shared_dqn_example.py
@@ -15,8 +15,6 @@ def lets_play(env, n_seats, model_list):
             if env.episode_end:
                 break
 
-
-
             while not cycle_terminal:
                 actions = holdem.model_list_action(cur_state=cur_state, n_seats=n_seats, model_list=model_list)
 
@@ -59,19 +57,19 @@ def lets_play(env, n_seats, model_list):
 model_list.append(agent.CdqnModel(model_prefix_name, shared_deep_q))
 
 env.add_player(5, stack=3000)
-model_list.append(agent.CdqnModel(model_prefix_name, shared_deep_q))
+model_list.append(agent.WinRateGambler())
 
 env.add_player(6, stack=3000)
-model_list.append(agent.CdqnModel(model_prefix_name, shared_deep_q))
+model_list.append(agent.WinRateGambler())
 
 env.add_player(7, stack=3000)
-model_list.append(agent.CdqnModel(model_prefix_name, shared_deep_q))
+model_list.append(agent.WinRateGambler())
 
 env.add_player(8, stack=3000)
-model_list.append(agent.CdqnModel(model_prefix_name, shared_deep_q))
+model_list.append(agent.WinRateGambler())
 
 env.add_player(9, stack=3000)
-model_list.append(agent.CdqnModel(model_prefix_name, shared_deep_q))
+model_list.append(agent.WinRateGambler())
 
 # play out a hand
 try: