-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlearning.py
410 lines (362 loc) · 15.8 KB
/
learning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
import time
import random
import copy
import numpy as np
import pandas as pd
from anytree import Node, RenderTree
from anytree.dotexport import RenderTreeGraph
from test2p2 import deal, lookup_hand
from constants import FLOP, TURN, RIVER, BET_33, BET_66, BET_50, BET_100, FOLD, RAISE, CALL, CHECK, CARDS, CARDS_REVERSE
# Ranges or let it work out what to open on its own?}: not feasible
# Post-flop
# Calculate equity of hand and range. Ordered list of all equities, bet 1-a% or let it work it out?}obv let it work
# We're at some flop A92ss 7 turn 2sss river
# OOP, we take action randomly, villain also randomly for 1000 times
# Then we start learning for 10,000 saving each time <-- print every 100
# Bet sizes 1/3 1/2 2/3, 100bb
# Villain action?
# Villain 3b call range BTNvsBB: QQ-22, A2s+, K9s+, Q9s+, J9s+, T8s+, 97s+, 86s+, 75s+, 65s, 54s, ATo+, KTo+, QTo+, JTo
# TODO: For testing regrets, have opponent strategy = always call, % probability of call increasing with lower bet size to see if finds optimal
actions_to_read = {3: 'BET_33', 5: 'BET_50', 7: 'BET_66', 11: 'BET_100', 13: 'FOLD', 17: 'RAISE', 19: 'CALL',
23: 'CHECK'}
def profile(func):
# rkern/line_profiler
return func
class PokerGame:
def __init__(self, AI_obj, hero_hand, villain_hand, villain_range, board=None, betting_lead=False, initial_pot=20,
hero_stack=100 - 10):
# self.AI = AI_obj
self.hand_h = hero_hand
self.hand_v = villain_hand
# self.range = villain_range
self.game_state = None
self.board = board
self.villain_stack = hero_stack
self.actions_taken = []
self.strategy = ""
self.tree = Node('Root')
@profile
def set_board(self, current_board):
if not current_board:
self.board = deal(self.hand_h + self.hand_v)
else:
num_cards = len(current_board)
if num_cards < 5:
self.board = current_board + deal(self.hand_h + self.hand_v + self.board, 5 - num_cards)
def get_opponent_response(self, game_state):
rng = random.random()
if game_state['last_bet'] == 0:
if rng > 99999:
# force opponent to check
pass
else:
bet_amount = BET_33 * self.game_state['pot_size']
if bet_amount >= self.game_state['hero_stack']:
# all in >= needed?
bet_amount = self.game_state['hero_stack']
# TODO: Go to showdown directly
self.game_state['betting_lead'] = False
self.game_state['pot_size'] += bet_amount
self.game_state['last_bet'] = bet_amount
# opponent checks
else:
# opponent calls or folds
if rng > 0.25:
# villain calls
game_state['pot_size'] += game_state['last_bet']
else:
if game_state['reraised']:
# villain calls raise
game_state['pot_size'] += game_state['last_bet']
else:
# villain raises
game_state['raised'] = True
game_state['pot_size'] += 3 * game_state['last_bet']
def get_actions(self):
if not self.game_state['betting_lead']:
actions = [CHECK]
elif self.game_state['last_bet'] == 0:
actions = [BET_33, BET_50, BET_66, BET_100, CHECK]
elif not self.game_state['reraised']:
actions = [FOLD, RAISE, CALL]
else:
actions = [FOLD, CALL]
return actions
def set_node(self, game_round, action, parent_node, extend=False):
if game_round == FLOP and not extend:
node_ID = actions_to_read[action]
else:
node_ID = str(parent_node.name) + ":" + actions_to_read[action]
for existing_node in parent_node.children:
if existing_node.name == node_ID:
action_node = existing_node
break
else:
action_node = Node(node_ID, parent=parent_node)
return action_node
def action_node(self, game_round, initial_state, action, parent_node):
self.game_state = initial_state.copy()
action_node = self.set_node(game_round, action, parent_node)
self.take_action(action)
self.get_opponent_response(self.game_state)
# Needs changing:
if self.game_state['raised']:
self.take_action(CALL)
action_node = self.set_node(game_round, CALL, action_node, extend=True)
elif self.game_state['last_bet'] != 0 and not self.game_state['betting_lead']:
self.take_action(CALL)
action_node = self.set_node(game_round, CALL, action_node, extend=True)
if self.game_state['hero_stack'] == 0 or game_round == RIVER:
utility = self.showdown(game_round)
elif self.game_state['villain_folded']:
utility = self.game_state['pot_size'] - (100 - self.game_state['hero_stack'])
elif self.game_state['folded']:
utility = self.game_state['hero_stack'] - 100
else:
utility = self.simulate_round(game_round + 1, self.game_state, action_node)
# if not action_node.is_leaf:
# # TODO: sum product with % opp strategy
# sum_utilities = 0
# for node in action_node.children:
# node_utility = node.get_value()
# sum_utilities += node_utility
#
# average_utility = sum_utilities/len(action_node.children)
# action_node.set_value("utility", round(average_utility, 3))
# # self.compute_regret(action_node, max_utility)
# else:
# action_node.set_value("utility", round(utility, 3))
# if action_node.siblings:
# # TODO: move this to end of iteration
# self.compute_regret(action_node)
# self.update_strategy(action_node.parent)
return utility
def compute_regret(self, action_node):
max_utility = 0
for node in action_node.parent.children:
utility = node.get_value()
max_utility = utility if utility > max_utility else max_utility
for node in action_node.parent.children:
regret = node.get_value() - max_utility
# print(regret)
node.set_value("regret", regret)
regretSum = node.get_value("regretSum")
if regretSum:
node.set_value("regretSum", regretSum + regret)
else:
node.set_value("regretSum", regret)
def compute_all_regret(self):
"""Build key-value structure for every possible line and its utility (pair: depends on won or lost, maybe not
needed if just using the size of the pot and work out net later)
Won or lost calculated once using current board if on the river else using % equity
Iterate through every (other) possible action and update node regret value based on chosen action's utility"""
pass
def update_strategy(self, parent_node):
normalizingSum = 0
strategies = []
for child in parent_node.children:
regretSum = child.get_value("regretSum")
strategy = 0
if regretSum > 0:
strategy = regretSum
strategies.append(strategy)
normalizingSum += strategy
if normalizingSum > 0:
strategies = [strategy / float(normalizingSum) for strategy in strategies]
else:
# equal strategies
strategies = [1 / len(strategies)] * len(strategies)
strategy_values = parent_node.get_value("strategy")
for x, child in enumerate(parent_node.children):
if strategy_values:
strategy_values.update({child.name: strategies[x]})
parent_node.set_value("strategy", strategy_values)
else:
parent_node.set_value("strategy", {child.name: strategies[x]})
def next_round(self):
self.game_state['last_bet'] = 0
self.game_state['raised'] = False
self.game_state['reraised'] = False
def simulate_round(self, game_round, game_state, parent_node, initial=False):
if not parent_node:
parent_node = self.tree
if initial or not self.game_state:
self.game_state = game_state.copy()
self.next_round()
legal_actions = self.get_actions()
if game_round == FLOP:
action = random.choice(legal_actions)
utility = self.action_node(game_round, game_state, action, parent_node)
elif game_round == TURN:
# for action in legal_actions:
# print(game_round)
action = random.choice(legal_actions)
utility = self.action_node(game_round, game_state, action, parent_node)
else:
# for action in legal_actions:
# print(game_round)
action = random.choice(legal_actions)
utility = self.action_node(game_round, game_state, action, parent_node)
print([actions_to_read[x] for x in self.game_state['line']])
return utility
# @profile
# def flop(self):
# self.set_board()
# if not self.betting_lead:
# self.action_check()
# else:
# legal_actions = self.get_actions()
# state = self.hand_h + self.board[:3] + self.actions_taken
# action = self.AI.choose_action(state, legal_actions)
# self.take_action(action)
# if self.hero_stack == 0:
# return self.showdown(2) # assuming eff stacks equal
#
# @profile
# def turn(self):
# self.reraised = False
# self.last_bet = 0
# if not self.betting_lead:
# self.action_check()
# else:
# legal_actions = self.get_actions()
# state = self.hand_h + self.board[:4] + self.actions_taken
# action = self.AI.choose_action(state, legal_actions)
# self.take_action(action)
# if self.hero_stack == 0:
# return self.showdown(1) # assuming eff stacks equal
#
# @profile
# def river(self):
# self.reraised = False
# self.last_bet = 0
# if not self.betting_lead:
# self.action_check()
# else:
# legal_actions = self.get_actions()
# state = self.hand_h + self.board + self.actions_taken
# action = self.AI.choose_action(state, legal_actions)
# self.take_action(action)
# return self.showdown(0)
@profile
def take_action(self, action):
actions = {BET_33: lambda: self.action_bet(0.3333),
BET_50: lambda: self.action_bet(0.50),
BET_66: lambda: self.action_bet(0.6666),
BET_100: lambda: self.action_bet(1.00),
FOLD: lambda: self.action_fold(),
RAISE: lambda: self.action_raise(),
CALL: lambda: self.action_call(),
CHECK: lambda: self.action_check()}
actions[action]()
self.game_state['line'].append(action)
# self.actions_taken.append(action)
# print(actions_to_read[action], game_state)
def action_fold(self):
self.game_state['folded'] = True
self.actions_taken.append("fold")
def action_check(self):
# TODO: separate object for villain
# TODO: Set betting lead
self.actions_taken.append("check")
self.game_state['last_bet'] = 0
# game_state['betting_lead'] = False
pass
def action_call(self):
self.update_chips(self.game_state['last_bet'])
self.actions_taken.append("call")
self.game_state['betting_lead'] = False
def action_bet(self, size):
bet_amount = size * self.game_state['pot_size']
self.update_chips(bet_amount)
self.actions_taken.append("bet " + str(size))
self.game_state['betting_lead'] = True
def action_raise(self):
bet_amount = 3 * self.game_state['last_bet']
self.update_chips(bet_amount)
if self.game_state['raised']:
self.game_state['reraised'] = True
else:
self.game_state['raised'] = True
self.game_state['betting_lead'] = True
@profile
def update_chips(self, bet_amount):
if bet_amount >= self.game_state['hero_stack']:
# all in >= needed?
bet_amount = self.game_state['hero_stack']
# TODO: Go to showdown directly
self.game_state['hero_stack'] -= bet_amount
self.game_state['pot_size'] += bet_amount
self.game_state['last_bet'] = bet_amount
@profile
def showdown(self, game_round):
cards_to_be_dealt = 3 - game_round # 3 on the flop already
if self.game_state['folded'] or cards_to_be_dealt == -1:
reward = self.game_state['hero_stack'] - 100
else:
if cards_to_be_dealt > 0:
# TODO: Enumerate all
reward = self.game_state['pot_size'] - (100 - self.game_state['hero_stack'])
# TODO: Change above
pass
else:
board_p = lookup_hand(self.board)
hero_score = lookup_hand(self.hand_h, p=board_p)
villain_score = lookup_hand(self.hand_v, p=board_p)
if hero_score > villain_score or self.game_state['villain_folded']:
reward = self.game_state['pot_size'] - (100 - self.game_state['hero_stack'])
elif hero_score < villain_score:
reward = self.game_state['hero_stack'] - 100
else:
reward = 0
return reward
def readable(hand):
return [CARDS_REVERSE[x] for x in hand]
def iterate_children(node):
if not node.is_leaf:
indent = node.depth * " "
optimal_node = None
for child in node.children:
if not optimal_node:
optimal_node = child
elif child.value > optimal_node.value:
optimal_node = child
print(indent, child.name.split(":")[-1], child.value)
iterate_children(child)
print("Optimal node", optimal_node.name)
def optimal_action(node):
optimal_node = None
if not node.is_leaf:
for child in node.children:
if not optimal_node:
optimal_node = child
elif child.value > optimal_node.value:
# if bet sizes are ordered min to max then this has the side effect
# of picking the smallest size that gives the same reward
optimal_node = child
print(optimal_node.name.split(":")[-1], optimal_node.value)
optimal_action(optimal_node)
N = 10000
@profile
def run():
hero = [CARDS['Ah'], CARDS['Qd']]
villain = [CARDS['2s'], CARDS['2d']]
board_test = [CARDS['Ad'], CARDS['Ac'], CARDS['As']]
root = Node('Root')
initial_game_state = {'pot_size': 2, 'betting_lead': True, 'hero_stack': 97, 'last_bet': 0, 'raised': False,
'reraised': False, 'folded': False, 'villain_folded': False, 'line': []}
game = PokerGame(None, hero, villain, None, board=board_test)
for i in range(N):
game.set_board(board_test)
game_state = copy.deepcopy(initial_game_state)
game.simulate_round(FLOP, game_state, root, initial=True)
RenderTreeGraph(root).to_picture("tree.png")
for pre, fill, node in RenderTree(root):
print("%s%s (%s) (%s) (%s) (%s)" % (
pre, str(node.name).split(":")[-1], node.get_value(), node.get_value("regret"), node.get_value("regretSum"),
node.get_value("strategyNOT")))
t0 = time.time()
run()
t = time.time() - t0
print(N / t, "cycles per second")
print(t, "seconds total")