diff --git a/agent/CdqnAgent.py b/agent/CdqnAgent.py index 08560e1..e735fc0 100644 --- a/agent/CdqnAgent.py +++ b/agent/CdqnAgent.py @@ -8,9 +8,10 @@ class CdqnModel: - stateSize = 10 # [monteCarlo, playerCount, remainChips, investChips, pot, toCall, oneHotRound0, oneHotRound1, oneHotRound2, oneHotRound3] - actionTrain = {0: 'FOLD', 1: 'CHECK', 2: 'RAISE*1', 3: 'RAISE*2', 4: 'RAISE*4', 5: 'RAISE*8', \ - 6: 'RAISE*16', 7: 'RAISE*32', 8: 'RAISE*48', 9: 'RAISE*64', 10: 'RAISE*100'} + stateSize = 11 # [monteCarlo, playerCount, remainChips, investChips, pot, toCall, oneHotRound0, oneHotRound1, oneHotRound2, oneHotRound3, oneHotRound4] + # actionTrain = {0: 'FOLD', 1: 'CHECK', 2: 'RAISE*1', 3: 'RAISE*2', 4: 'RAISE*4', 5: 'RAISE*8', \ + # 6: 'RAISE*16', 7: 'RAISE*32', 8: 'RAISE*48', 9: 'RAISE*64', 10: 'RAISE*100'} + actionTrain = {0: 'FOLD', 1: 'CHECK', 2: 'RAISE*1', 3: 'RAISE*2'} def __init__(self, model_name_prefix="test", deep_q=None): # self.reload_left = 2 @@ -131,13 +132,16 @@ def _transAction(self, minBet, actionID): def _toOneHotRound(self, round): if round == 0: - return [1, 0, 0, 0] + return [1, 0, 0, 0, 0] elif round == 1: - return [0, 1, 0, 0] + return [0, 1, 0, 0, 0] elif round == 2: - return [0, 0, 1, 0] + return [0, 0, 1, 0, 0] + elif round == 3: + return [0, 0, 0, 1, 0] else: - return [0, 0, 0, 1] + return [0, 0, 0, 0, 1] + # observation, actionID, reward, newObservation, done # observation: diff --git a/shared_dqn_example.py b/shared_dqn_example.py index 5efe9ed..9233bdd 100644 --- a/shared_dqn_example.py +++ b/shared_dqn_example.py @@ -15,8 +15,6 @@ def lets_play(env, n_seats, model_list): if env.episode_end: break - - while not cycle_terminal: actions = holdem.model_list_action(cur_state=cur_state, n_seats=n_seats, model_list=model_list) @@ -59,19 +57,19 @@ def lets_play(env, n_seats, model_list): model_list.append(agent.CdqnModel(model_prefix_name, shared_deep_q)) env.add_player(5, stack=3000) -model_list.append(agent.CdqnModel(model_prefix_name, shared_deep_q)) +model_list.append(agent.WinRateGambler()) env.add_player(6, stack=3000) -model_list.append(agent.CdqnModel(model_prefix_name, shared_deep_q)) +model_list.append(agent.WinRateGambler()) env.add_player(7, stack=3000) -model_list.append(agent.CdqnModel(model_prefix_name, shared_deep_q)) +model_list.append(agent.WinRateGambler()) env.add_player(8, stack=3000) -model_list.append(agent.CdqnModel(model_prefix_name, shared_deep_q)) +model_list.append(agent.WinRateGambler()) env.add_player(9, stack=3000) -model_list.append(agent.CdqnModel(model_prefix_name, shared_deep_q)) +model_list.append(agent.WinRateGambler()) # play out a hand try: