Skip to content

Commit

Permalink
Integrate preflop CFR into AI
Browse files Browse the repository at this point in the history
  • Loading branch information
Gongsta committed Jun 21, 2024
1 parent 174106e commit 9f7a532
Show file tree
Hide file tree
Showing 9 changed files with 309 additions and 850 deletions.
227 changes: 134 additions & 93 deletions slumbot/slumbot_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
The / represents the transition between each game stage, i.e. pre-flop/flop/turn/river.
Example of a game sequence that reaches showdown: cb300c/kk/kb300c/kb1200c
"""

import numpy as np
import requests
import sys
import argparse
Expand All @@ -55,6 +55,11 @@

sys.path.append("../src")

from aiplayer import EquityAIPlayer, CFRAIPlayer
from preflop_holdem import PreflopHoldemInfoSet, PreflopHoldemHistory
from postflop_holdem import PostflopHoldemInfoSet, PostflopHoldemHistory


from abstraction import (
calculate_equity,
calculate_equity_distribution,
Expand All @@ -69,23 +74,26 @@
- (Strategy 1) Only play the hands where you have over 50% chance of winning using my `calculate_equity` function
- (Strategy 2) Use the CFR algorithm
"""
STRATEGY = 0 # SET THE STRATEGY HERE
STRATEGY = 4 # SET THE STRATEGY HERE

player = None
if STRATEGY == 0:
USERNAME = "all-in"
PASSWORD = "all-in"
if STRATEGY == 1:
USERNAME = "calling_station"
PASSWORD = "calling_station"
elif STRATEGY == 1:
elif STRATEGY == 2:
USERNAME = "dumb_equity"
PASSWORD = "dumb_equity"
elif STRATEGY == 2:
elif STRATEGY == 3:
USERNAME = "smart_equity"
PASSWORD = "smart_equity"
elif STRATEGY == 3:
player = EquityAIPlayer(20000)
elif STRATEGY == 4:
USERNAME = "cfr_abstraction"
PASSWORD = "cfr_abstraction"
elif STRATEGY == 4:
USERNAME = "preflop"

player = CFRAIPlayer(20000)
# if os.path.exists(f"../data/slumbot/{USERNAME}.joblib"): # Load previous history if it exists
# history = joblib.load(f"../data/slumbot/{USERNAME}.joblib")

Expand Down Expand Up @@ -317,88 +325,111 @@ def Act(token, action):
return r


from aiplayer import getAction
import numpy as np


def ComputeStrategy(hole_cards, board, action, strategy=STRATEGY):
a = ParseAction(action)

if strategy == 0: # always check or call
player_balance = 20000 - (a["total_last_bet_to"] - a["street_last_bet_to"])
total_pot_balance = 2 * a["total_last_bet_to"]
stage_pot_balance = total_pot_balance
highest_current_bet = a["street_last_bet_to"]
BIG_BLIND = 100
card_str = hole_cards
community_cards = board
isDealer = a["pos"] == 1
checkAllowed = a["last_bettor"] == -1

history = convert_action_to_history(hole_cards, board, action, isDealer)
print(history)

if strategy == 0: # all-in
incr = "b20000"
elif strategy == 1: # always check or call
if a["last_bettor"] == -1: # no one has bet yet
incr = "k"
else: # opponent has bet, so simply call
incr = "c"
elif strategy == 1:
equity = calculate_equity(hole_cards, board, n=5000)
print(f"equity calculated: {equity} for hole cards: {hole_cards} and board: {board}")
if a["last_bettor"] == -1:
if equity >= 0.5:
incr = "b1000"
else:
incr = "k"
else:
if equity >= 0.5:
incr = "c"
else:
incr = "f"
elif strategy == 2:
card_str = hole_cards
community_cards = board
# if observed_env.game_stage == 2:
equity = calculate_equity(card_str, community_cards)

# fold, check / call, raise
np_strategy = np.abs(np.array([1.0 - (equity + equity / 2.0), equity, equity / 2.0]))
np_strategy = np_strategy / np.sum(np_strategy) # normalize
remaining = 20000 - (a["total_last_bet_to"] - a["street_last_bet_to"])

if a["street_last_bet_to"] == 0: # no bet placed
if a["pos"] == 1: # If you are the dealer, raise more of the time
strategy = {
"k": np_strategy[0],
f"b{min(100, remaining)}": np_strategy[2],
f"b{min(2 * a['total_last_bet_to'], remaining)}": np_strategy[1], # pot-size
}
equity = calculate_equity(hole_cards, board, n=5000)
if a["last_bettor"] == -1:
if equity >= 0.5:
incr = "b1000"
else:
strategy = {
"k": equity,
f"b{min(2 * a['total_last_bet_to'], remaining)}": 1 - equity,
}

else: # if there is a bet already
# TODO: calculate proportional to bet size
# normalize the strategy
if a["last_bettor"] == -1: # You can check
strategy = {
"k": np_strategy[0],
f"b{min(int(1.5 * a['street_last_bet_to']), remaining)}": np_strategy[1],
f"b{min(2 *a['street_last_bet_to'], remaining)}": np_strategy[2],
}
incr = "k"
else:
if equity >= 0.5:
incr = "c"
else:
if remaining == a["street_last_bet_to"]:
strategy = {
"f": np_strategy[0],
"c": np_strategy[1] + np_strategy[2],
}
else:
strategy = {
"f": np_strategy[0],
"c": np_strategy[1],
f"b{min(2 * a['street_last_bet_to'], remaining)}": np_strategy[2],
}

print(action, a)
print(card_str, community_cards)
print("equity", equity)
print("AI strategy ", strategy)
incr = getAction(strategy)
print("decision", incr)
print("")
incr = "f"
elif strategy == 3:
incr = player.get_action(
card_str,
community_cards,
total_pot_balance,
highest_current_bet,
BIG_BLIND,
player_balance,
isDealer,
checkAllowed,
)

elif strategy == 4:
# ck/kk/kb100c/kb100b400
# convert this to a infoset, use the environment
incr = player.get_action(
history,
card_str,
community_cards,
highest_current_bet,
stage_pot_balance,
total_pot_balance,
player_balance,
BIG_BLIND,
isDealer,
checkAllowed,
)

return incr


def convert_action_to_history(hole_cards, board, action, isDealer):
DISCRETE_ACTIONS = ["k", "c", "/"]
stage_i = 0
if isDealer:
history = ["2s2h", "".join(hole_cards)]
else:
history = ["".join(hole_cards), "2s2h"]

i = 0
while i < len(action):
a = action[i]

if a in DISCRETE_ACTIONS:
if a == "/":
history += ["/"]
stage_i += 1
if stage_i == 1:
history += ["".join(board[:3])]
elif stage_i == 2:
history += ["".join(board[3])]
else:
history += ["".join(board[4])]

else:
history += [a]
i += 1

else:
j = i
while j < len(action):
if action[j] in DISCRETE_ACTIONS or (j != i and action[j] == "b"):
break
j += 1

history += [action[i:j]]
i = j
return history


def PlayHand(token, debug=False):
r = NewHand(token)
# We may get a new token back from /api/new_hand
Expand All @@ -419,6 +450,7 @@ def PlayHand(token, debug=False):
hole_cards = r.get("hole_cards")
board = r.get("board")
winnings = r.get("winnings")
baseline_winnings = r.get("baseline_winnings")
if debug:
print("Action: %s" % action)
if client_pos:
Expand All @@ -427,14 +459,16 @@ def PlayHand(token, debug=False):
print("Board: %s" % repr(board))
if winnings is not None:
print("Hand winnings: %i" % winnings)
return (token, winnings)
return (token, winnings, baseline_winnings)
# Need to check or call
a = ParseAction(
action
) # Ex: {'st': 0, 'pos': 1, 'street_last_bet_to': 100, 'total_last_bet_to': 100, 'last_bet_size': 50, 'last_bettor': 0}
if "error" in a:
print("Error parsing action %s: %s" % (action, a["error"]))
sys.exit(-1)
print(a)
return (token, 0, 0)
sys.exit(-1) # don't exit, just don't consider this round

incr = ComputeStrategy(hole_cards, board, action)
if debug:
Expand Down Expand Up @@ -492,24 +526,31 @@ def main():
# import urllib3
# urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
num_hands = 10000
winnings = 0
winnings_history = []
baseline_winnings_history = []
# winnings_history = joblib.load(f"../results/slumbot_strategy_{STRATEGY}_{USERNAME}_raw.joblib")
# baseline_winnings_history = joblib.load(
# f"../results/slumbot_strategy_{STRATEGY}_{USERNAME}_baseline.joblib"
# )
winnings = winnings_history[-1] if len(winnings_history) > 0 else 0
baseline_winnings = baseline_winnings_history[-1] if len(baseline_winnings_history) > 0 else 0

for h in tqdm(range(num_hands)):
try:
(token, hand_winnings) = PlayHand(token)
winnings += hand_winnings
winnings_history.append(winnings)
if h % 1000 == 0:
joblib.dump(
winnings_history, f"../results/slumbot_strategy_{STRATEGY}_{USERNAME}.joblib"
)
# print(history)
except Exception as e:
print(e)
num_hands += 1

joblib.dump(winnings_history, f"../results/slumbot_strategy_{STRATEGY}_{USERNAME}.joblib")
(token, hand_winnings, baseline_hand_winnings) = PlayHand(token)
winnings += hand_winnings
baseline_winnings += baseline_hand_winnings
winnings_history.append(winnings)
baseline_winnings_history.append(baseline_winnings)
if (h + 1) % 1000 == 0:
joblib.dump(
winnings_history, f"../results/slumbot_strategy_{STRATEGY}_{USERNAME}_raw.joblib"
)
joblib.dump(
baseline_winnings_history,
f"../results/slumbot_strategy_{STRATEGY}_{USERNAME}_baseline.joblib",
)
# print(history)

print("Total winnings: %i" % winnings)


Expand Down
1 change: 0 additions & 1 deletion src/abstraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,6 @@ def generate_dataset(num_samples=50000, batch=0, save=True):
np.save(f"dataset/player_hands_{batch}.npy", player_hands)
np.save(f"dataset/opponent_hands_{batch}.npy", opponent_hands)
np.save(f"dataset/winners_{batch}.npy", winners)
print("continuing to save datasets")

np.save(f"dataset/player_flop_clusters_{batch}.npy", player_flop_clusters)
np.save(f"dataset/player_turn_clusters_{batch}.npy", player_turn_clusters)
Expand Down
Loading

0 comments on commit 9f7a532

Please sign in to comment.