From 8ae25717bb2bf20ffb425e10a230a3b863ef34e1 Mon Sep 17 00:00:00 2001 From: Steven Date: Sat, 15 Jun 2024 02:46:21 -0700 Subject: [PATCH] update CFR code --- game/poker_main.py | 46 ++- src/abstraction.py | 808 +++++++++++++++++++++--------------------- src/base.py | 18 +- src/environment.py | 177 ++++++--- src/fast_evaluator.py | 26 ++ src/holdem.py | 665 +++++++++++++++++++++------------- src/kuhn.py | 4 +- 7 files changed, 1011 insertions(+), 733 deletions(-) diff --git a/game/poker_main.py b/game/poker_main.py index c238110..bc9f8fb 100644 --- a/game/poker_main.py +++ b/game/poker_main.py @@ -1,15 +1,11 @@ -""" -Nobody cares about no-limit hold'em -""" - import sys sys.path.append("../src") + from environment import * from helper import * import pygame import argparse -import os import time import joblib @@ -31,7 +27,6 @@ RED = (255, 0, 0) FPS = 60 -# POKER_BACKGROUND = pygame.transform.scale(pygame.image.load("assets/poker-table(OLD).jpg"), (WIDTH, HEIGHT)) POKER_BACKGROUND = pygame.transform.scale( pygame.image.load("assets/poker-table.png"), (WIDTH, HEIGHT) ) @@ -75,11 +70,10 @@ CARD_BACK = pygame.transform.scale(pygame.image.load("../assets/back.png"), (263 / 3, 376 / 3)) -POT_FONT = pygame.font.SysFont("Roboto", 30) -BET_BUTTON_FONT = pygame.font.SysFont("Roboto", 24) -BET_FONT = pygame.font.SysFont("Roboto", 26) -PLAYERS_FONT = pygame.font.SysFont("Roboto", 24) - +POT_FONT = pygame.font.SysFont("Roboto", 30, bold=True) +BET_BUTTON_FONT = pygame.font.SysFont("Roboto", 24, bold=True) +BET_FONT = pygame.font.SysFont("Roboto", 26, bold=True) +PLAYERS_FONT = pygame.font.SysFont("Roboto", 24, bold=True) # To rescale: pygame.transform.scale(card, (width, height)) # pygame.transform.rotate(card, degrees) @@ -108,14 +102,12 @@ cursor_counter = 0 - def load_card_image(card: Card): # 263 × 376 return pygame.transform.scale( pygame.image.load("../assets/" + str(card) + ".png"), (263 / 3, 376 / 3) ) - def display_total_pot_balance(env: PokerEnvironment): pot_information = POT_FONT.render("Total Pot: $" + str(env.total_pot_balance), 1, WHITE) WIN.blit(pot_information, (900, HEIGHT / 2 - 30)) @@ -246,18 +238,18 @@ def draw_window(env: PokerEnvironment, god_mode=False, user_input=False): WIN.blit(warning_text_rendered, (WIDTH - 250, HEIGHT - 120)) if user_input: - if env.position_in_play == 0: + if env.position_in_play == 0 or env.play_as_AI: # AAfilledRoundedRect(WIN, RED, pygame.Rect(392,400, 120,50), radius=0.4) AAfilledRoundedRect(WIN, RED, check_rect, radius=0.4) AAfilledRoundedRect(WIN, RED, custom_rect, radius=0.4) AAfilledRoundedRect(WIN, WHITE, input_box, radius=0.4) - if "f" in env.history.actions(): + if "f" in env.infoset.actions(): AAfilledRoundedRect(WIN, RED, fold_rect, radius=0.4) fold_bet = BET_BUTTON_FONT.render("Fold", 1, WHITE) WIN.blit(fold_bet, (fold_rect.x + 15, fold_rect.y + 7)) - if "k" in env.history.actions(): + if "k" in env.infoset.actions(): check_bet = BET_BUTTON_FONT.render("Check", 1, WHITE) WIN.blit(check_bet, (check_rect.x + 15, check_rect.y + 7)) else: # TODO: Min bet size is not 0 when you are the small blind, so it should be call, not check right. @@ -318,16 +310,20 @@ def main(): game = 0 game_i = 0 - env: PokerEnvironment = PokerEnvironment() - if user_input or replay: - env.add_player() # You / replay - else: - env.add_AI_player() # Simulation player - if replay: - env.add_player() # Player since we want everything to be entered manually - else: - env.add_AI_player() # Opponent + env: PokerEnvironment = PokerEnvironment() + # if user_input or replay: + # env.add_player() # You / replay + # else: + # env.add_AI_player() # Simulation player + + # if replay: + # env.add_player() # Player since we want everything to be entered manually + # else: + # env.add_AI_player() # Opponent + # play as the AI + env.add_AI_player() + env.add_player() # play as the opponent too clock = pygame.time.Clock() run = True diff --git a/src/abstraction.py b/src/abstraction.py index 14290f6..1889db3 100644 --- a/src/abstraction.py +++ b/src/abstraction.py @@ -1,5 +1,5 @@ """ -Python file that takes care of betting and card abstractions for Poker. +Python file that takes care of betting and card abstractions for Poker. Inspired from Noam Brown's paper: https://arxiv.org/pdf/1805.08195.pdf @@ -11,6 +11,7 @@ *Note on Card Abstraction: While I wrote my own Card and Deck object implements, it is simply too slow. Rather, working with string representation is much faster and memory-efficient. """ + from copy import deepcopy from typing import List import fast_evaluator @@ -23,14 +24,15 @@ import glob import joblib from joblib import Parallel, delayed + """ BET ABSTRACTION, hmm this logic directly encoded in `holdem.py` """ -# For action abstraction, I have decided to simplify the actions to fold (f), check (k), call (c), small-bet (0.5x pot), medium-bet (1x pot), large-bet (2x pot), and all-in. +# For action abstraction, I have decided to simplify the actions to fold (f), check (k), call (c), small-bet (0.5x pot), medium-bet (1x pot), large-bet (2x pot), and all-in. # def bet_abstraction(bet_size): # """Bet size is relative to pot. - + # """ # if bet_size == 0: # return 'c' @@ -46,17 +48,17 @@ # def abstraction(): # # TODO: Investigate the effect of action abstraction on exploitability. # """ - + # Daniel Negreanu: How Much Should You Raise? https://www.youtube.com/watch?v=WqRUyYQcc5U # Bet sizing: https://www.consciouspoker.com/blog/poker-bet-sizing-strategy/#:~:text=We%20recommend%20using%201.5x,t%20deduce%20your%20likely%20holdings. # Also see slumbot notes: https://nanopdf.com/queue/slumbot-nl-solving-large-games-with-counterfactual_pdf?queue_id=-1&x=1670505293&z=OTkuMjA5LjUyLjEzOA== - + # TODO: Check the case on preflop when the small blind simply calls, the BB should have the option to min-raise by amounts. # For initial bets, these are fractions of the total pot size (money at the center of the table): # for bets: -# - b0.25 = bet 25% of the pot +# - b0.25 = bet 25% of the pot # - b0.5 = bet 50% of the pot # - b0.75 = bet 75% of the pot # - b1 = bet 100% of the pot @@ -67,28 +69,28 @@ # After a bet has happened, we can only raise by a certain amount. # - b0.5 -# - b1 +# - b1 # - b2 = 2x pot size # - b4 = 4x pot size # - b8 = 8x pot size # - all-in = all-in, opponent is forced to either call or fold - + # 2-bet is the last time we can raise again # - b1 # - b2 = 2x pot size # - all-in - + # 3-bet # - b1 - + # 4-bet # - all-in # """ - + # # Note: all-in case is just the maximum bet -# actions = ['k', 'b0.25','b0.5', 'b0.75', 'b1', 'b2', 'b4', 'b8', 'all-in', 'c', 'f'] - +# actions = ['k', 'b0.25','b0.5', 'b0.75', 'b1', 'b2', 'b4', 'b8', 'all-in', 'c', 'f'] + # current_game_stage_history, stage = self.get_current_game_stage_history() # # Pre-flop @@ -109,9 +111,9 @@ # elif len(current_game_stage_history) == 2: # 3-bet # # You cannot check at this point # actions = ['b1', 'all-in', 'c', 'f'] - + # elif len(current_game_stage_history) == 3: # 4-bet -# actions = ['all-in', 'c', 'f'] +# actions = ['all-in', 'c', 'f'] # else: # flop, turn, river # if len(current_game_stage_history == 0): @@ -152,204 +154,202 @@ import torch from tqdm import tqdm + # Preflop Abstraction with 169 buckets (lossless abstraction) -def get_preflop_cluster_id(two_cards_string): # Lossless abstraction for pre-flop, 169 clusters - # cards input ex: Ak2h or ['Ak', '2h'] - """ - For the Pre-flop, we can make a lossless abstraction with exactly 169 buckets. The idea here is that what specific suits - our private cards are doesn't matter. The only thing that matters is whether both cards are suited or not. - - This is how the number 169 is calculated: - - For cards that are not pocket pairs, we have (13 choose 2) = 13 * 12 / 2 = 78 buckets (since order doesn't matter) - - These cards that are not pocket pairs can also be suited, so we must differentiate them. We have 78 * 2 = 156 buckets - - Finally, for cards that are pocket pairs, we have 13 extra buckets (Pair of Aces, Pair of 2, ... Pair Kings). 156 + 13 = 169 buckets - - Note that a pair cannot be suited, so we don't need to multiply by 2. - - Cluster ids: - 1-13 -> pockets - 14-91 -> Unsuited cluster pairs that are not pockets - 92-169 -> Suited cluster pairs that are not pockets - - """ - if type(two_cards_string) == list: - two_cards_string = "".join(two_cards_string) - - assert(len(two_cards_string) == 4) - - KEY = {"A": 1, "2": 2, "3": 3, "4":4, "5":5, "6":6, # Supports both "T" and "10" as 10 - "7": 7, "8": 8, "9": 9, "T": 10, "10":10, "J": 11, "Q": 12, "K":13} - - cluster_id = 0 - - def hash_(a, b): - """ - A2/2A -> 1 - A3/3A -> 2 - A4/4A -> 3 - ... - KQ/QK -> 78 - - returns values ranging from 1 to 78 - """ - assert(a != b) - assert(len(a) == 1 and len(b) == 1) - first = min(KEY[a], KEY[b]) - second = max(KEY[a], KEY[b]) - ans = first * (first - 1) / 2 + (second - 1) - return int(ans) - - if two_cards_string[0] == two_cards_string[2]: # pockets - cluster_id = KEY[two_cards_string[0]] - elif two_cards_string[1] != two_cards_string[3]: # unsuited that are not pockets - cluster_id = 13 + hash_(two_cards_string[0], two_cards_string[2]) - else: # suited that are not pockets - cluster_id = 91 + hash_(two_cards_string[0], two_cards_string[2]) - - assert(cluster_id >= 1 and cluster_id <= 169) - - return cluster_id +def get_preflop_cluster_id(two_cards_string): # Lossless abstraction for pre-flop, 169 clusters + # cards input ex: Ak2h or ['Ak', '2h'] + """ + For the Pre-flop, we can make a lossless abstraction with exactly 169 buckets. The idea here is that what specific suits + our private cards are doesn't matter. The only thing that matters is whether both cards are suited or not. + + This is how the number 169 is calculated: + - For cards that are not pocket pairs, we have (13 choose 2) = 13 * 12 / 2 = 78 buckets (since order doesn't matter) + - These cards that are not pocket pairs can also be suited, so we must differentiate them. We have 78 * 2 = 156 buckets + - Finally, for cards that are pocket pairs, we have 13 extra buckets (Pair of Aces, Pair of 2, ... Pair Kings). 156 + 13 = 169 buckets + + Note that a pair cannot be suited, so we don't need to multiply by 2. + + Cluster ids: + 1-13 -> pockets + 14-91 -> Unsuited cluster pairs that are not pockets + 92-169 -> Suited cluster pairs that are not pockets + + """ + if type(two_cards_string) == list: + two_cards_string = "".join(two_cards_string) + + assert len(two_cards_string) == 4 + + KEY = { + "A": 1, + "2": 2, + "3": 3, + "4": 4, + "5": 5, + "6": 6, # Supports both "T" and "10" as 10 + "7": 7, + "8": 8, + "9": 9, + "T": 10, + "10": 10, + "J": 11, + "Q": 12, + "K": 13, + } + + cluster_id = 0 + + def hash_(a, b): + """ + A2/2A -> 1 + A3/3A -> 2 + A4/4A -> 3 + ... + KQ/QK -> 78 + + returns values ranging from 1 to 78 + """ + assert a != b + assert len(a) == 1 and len(b) == 1 + first = min(KEY[a], KEY[b]) + second = max(KEY[a], KEY[b]) + ans = first * (first - 1) / 2 + (second - 1) + return int(ans) + + if two_cards_string[0] == two_cards_string[2]: # pockets + cluster_id = KEY[two_cards_string[0]] + elif two_cards_string[1] != two_cards_string[3]: # unsuited that are not pockets + cluster_id = 13 + hash_(two_cards_string[0], two_cards_string[2]) + else: # suited that are not pockets + cluster_id = 91 + hash_(two_cards_string[0], two_cards_string[2]) + + assert cluster_id >= 1 and cluster_id <= 169 + + return cluster_id + # Post-Flop Abstraction using Equity Distributions def create_abstraction_folders(): - if not os.path.exists('../data'): - for split in ['clusters', 'raw']: - for stage in ['flop', 'turn', 'river']: - os.makedirs(f'../data/{split}/{stage}') + if not os.path.exists("../data"): + for split in ["clusters", "raw"]: + for stage in ["flop", "turn", "river"]: + os.makedirs(f"../data/{split}/{stage}") def calculate_equity(player_cards: List[str], community_cards=[], n=1000, timer=False): - if timer: - start_time = time.time() - wins = 0 - deck = fast_evaluator.Deck(excluded_cards=player_cards + community_cards) - - for _ in range(n): - random.shuffle(deck) - opponent_cards = deck[:2] # To avoid creating redundant copies - player_score = evaluate_cards(*(player_cards + community_cards + deck[2:2+(5 - len(community_cards))])) - opponent_score = evaluate_cards(*(opponent_cards + community_cards + deck[2:2+(5 - len(community_cards))])) - if player_score < opponent_score: - wins += 1 - elif player_score == opponent_score: - wins += 0.5 - - if timer: - print("Time it takes to call function: {}s".format(time.time() - start_time)) - - return wins / n - - -def calculate_face_up_equity(player_cards, opponent_cards, community_cards, n=1000): - """Same as calculate_equity, except that you know your opponents cards as well, so total probability should sum to one. - """ - assert(len(player_cards) == 2 and len(opponent_cards) == 2) - player_wins = 0 - opponent_wins = 0 - - deck = fast_evaluator.Deck(excluded_cards=player_cards + opponent_cards + community_cards) - for _ in range(n): - random.shuffle(deck) - player_score = evaluate_cards(*(player_cards + community_cards + deck[0:(5 - len(community_cards))])) - opponent_score = evaluate_cards(*(opponent_cards + community_cards + deck[0:(5 - len(community_cards))])) - - if player_score < opponent_score: - player_wins += 1 - elif player_score == opponent_score: - player_wins += 0.5 - opponent_wins += 0.5 - else: - opponent_wins += 1 - - - return player_wins / n, opponent_wins / n - -def calculate_equity_distribution(player_cards: List[str], community_cards=[], bins=5, n=200, timer=False, parallel=True): - """ - Return - equity_hist - Histogram as a list of "bins" elements - - n = # of cards to sample from the next round to generate this distribution. - - There is a tradeoff between the execution speed and variance of the values calculated, since - we are using a monte-carlo method to calculate those equites. In the end, I found a bin=5, n=100 - and rollouts using 100 values to be a good approximation. We won't be using this method for - pre-flop, since we can have a lossless abstraction of that method anyways. - - The equity distribution is a better way to represent the strength of a given hand. It represents - how well a given hand performs over various profiles of community cards. We can calculate - the equity distribution of a hand at the following game stages: flop (we are given no community cards), turn (given 3 community cards) and river (given 4 community cards). - - if we want to generate a distribution for the EHS of the turn (so we are given our private cards + 3 community cards), - we draw various turn cards, and calculate the equity using those turn cards. - If we find for a given turn card that its equity is 0.645, and we have 10 bins, we would increment the bin 0.60-0.70 by one. - We repeat this process until we get enough turn card samples. - """ - if timer: - start_time = time.time() - equity_hist = [0 for _ in range(bins)] # histogram of equities, where equity[i] represents the probability of the i-th bin - - assert(len(community_cards) != 1 and len(community_cards) != 2) - - deck = fast_evaluator.Deck(excluded_cards=player_cards + community_cards) - - if parallel: # Computing these equity distributions in parallel is much faster - def sample_equity(): - random.shuffle(deck) - if len(community_cards) == 0: - score = calculate_equity(player_cards, community_cards + deck[:3], n=200) - elif (len(community_cards) < 5): - score = calculate_equity(player_cards, community_cards + deck[:1], n=100) - else: - score = calculate_equity(player_cards, community_cards, n=100) - - # equity_hist[min(int(score * bins), bins-1)] += 1.0 # Score of the closest bucket is incremented by 1 - return min(int(score * bins), bins-1) - - equity_bin_list = Parallel(n_jobs=-1)(delayed(sample_equity)() for _ in range(n)) - for bin_i in equity_bin_list: - equity_hist[bin_i] += 1.0 - - else: - for i in range(n): - random.shuffle(deck) - if len(community_cards) == 0: - score = calculate_equity(player_cards, community_cards + deck[:3], n=200) - else: - score = calculate_equity(player_cards, community_cards + deck[:1], n=100) - - equity_hist[min(int(score * bins), bins-1)] += 1.0 # Score of the closest bucket is incremented by 1 - - # Normalize the equity so that the probability mass function (p.m.f.) of the distribution sums to 1 - for i in range(bins): - equity_hist[i] /= n - - if timer: - print("Time to calculate equity distribution: ", time.time() - start_time) - return equity_hist - + if timer: + start_time = time.time() + wins = 0 + deck = fast_evaluator.Deck(excluded_cards=player_cards + community_cards) + + for _ in range(n): + random.shuffle(deck) + opponent_cards = deck[:2] # To avoid creating redundant copies + player_score = evaluate_cards( + *(player_cards + community_cards + deck[2 : 2 + (5 - len(community_cards))]) + ) + opponent_score = evaluate_cards( + *(opponent_cards + community_cards + deck[2 : 2 + (5 - len(community_cards))]) + ) + if player_score < opponent_score: + wins += 1 + elif player_score == opponent_score: + wins += 0.5 + + if timer: + print("Time it takes to call function: {}s".format(time.time() - start_time)) + + return wins / n + + +def calculate_equity_distribution( + player_cards: List[str], community_cards=[], bins=5, n=200, timer=False, parallel=False +): + """ + Return + equity_hist - Histogram as a list of "bins" elements + + n = # of cards to sample from the next round to generate this distribution. + + There is a tradeoff between the execution speed and variance of the values calculated, since + we are using a monte-carlo method to calculate those equites. In the end, I found a bin=5, n=100 + and rollouts using 100 values to be a good approximation. We won't be using this method for + pre-flop, since we can have a lossless abstraction of that method anyways. + + The equity distribution is a better way to represent the strength of a given hand. It represents + how well a given hand performs over various profiles of community cards. We can calculate + the equity distribution of a hand at the following game stages: flop (we are given no community cards), turn (given 3 community cards) and river (given 4 community cards). + + if we want to generate a distribution for the EHS of the turn (so we are given our private cards + 3 community cards), + we draw various turn cards, and calculate the equity using those turn cards. + If we find for a given turn card that its equity is 0.645, and we have 10 bins, we would increment the bin 0.60-0.70 by one. + We repeat this process until we get enough turn card samples. + """ + if timer: + start_time = time.time() + equity_hist = [ + 0 for _ in range(bins) + ] # histogram of equities, where equity[i] represents the probability of the i-th bin + + assert len(community_cards) != 1 and len(community_cards) != 2 + + deck = fast_evaluator.Deck(excluded_cards=player_cards + community_cards) + + def sample_equity(): + random.shuffle(deck) + if len(community_cards) == 0: + score = calculate_equity(player_cards, community_cards + deck[:3], n=200) + elif len(community_cards) < 5: + score = calculate_equity(player_cards, community_cards + deck[:1], n=100) + else: + score = calculate_equity(player_cards, community_cards, n=100) + + # equity_hist[min(int(score * bins), bins-1)] += 1.0 # Score of the closest bucket is incremented by 1 + return min(int(score * bins), bins - 1) + + if parallel: # Computing these equity distributions in parallel is much faster + equity_bin_list = Parallel(n_jobs=-1)(delayed(sample_equity)() for _ in range(n)) + + else: + equity_bin_list = [sample_equity() for _ in range(n)] + + for bin_i in equity_bin_list: + equity_hist[bin_i] += 1.0 + + # Normalize the equity so that the probability mass function (p.m.f.) of the distribution sums to 1 + for i in range(bins): + equity_hist[i] /= n + + if timer: + print("Time to calculate equity distribution: ", time.time() - start_time) + return equity_hist + + def plot_equity_hist(equity_hist, player_cards=None, community_cards=None): - """Plot the equity histogram. - """ - plt.clf() # Clear Canvas - plt.hist([i/len(equity_hist) for i in range(len(equity_hist))],[i/len(equity_hist) for i in range(len(equity_hist)+1)], weights=equity_hist) - plt.ylabel("Probability Mass") - plt.xlabel("Equity Interval") - if player_cards: - player_string = "\nPlayer Cards: " + str(player_cards) - else: - player_string = "" - - if community_cards: - community_string = "\nCommunity Cards: " + str(community_cards) - else: - community_string = "" - - plt.title("Equity Distribution" + player_string + community_string) - plt.show(block=False) # to plot graphs consecutively quickly with non-blocking behavior - plt.pause(0.2) - - + """Plot the equity histogram.""" + plt.clf() # Clear Canvas + plt.hist( + [i / len(equity_hist) for i in range(len(equity_hist))], + [i / len(equity_hist) for i in range(len(equity_hist) + 1)], + weights=equity_hist, + ) + plt.ylabel("Probability Mass") + plt.xlabel("Equity Interval") + if player_cards: + player_string = "\nPlayer Cards: " + str(player_cards) + else: + player_string = "" + + if community_cards: + community_string = "\nCommunity Cards: " + str(community_cards) + else: + community_string = "" + + plt.title("Equity Distribution" + player_string + community_string) + plt.show(block=False) # to plot graphs consecutively quickly with non-blocking behavior + plt.pause(0.2) + + """ The Algorithm: 1. For river, generate 5000 clusters, each representing a particular equity distribution. @@ -363,209 +363,213 @@ def plot_equity_hist(equity_hist, player_cards=None, community_cards=None): 4. For pre-flop, generate 169 clusters (lossless abstraction). """ -def generate_postflop_equity_distributions(n_samples, bins,stage=None, save=True, timer=True): # Lossful abstraction for flop, turn and river - if timer: - start_time = time.time() - assert(stage is None or stage == 'flop' or stage == 'turn' or stage == 'river') - equity_distributions = [] - hands = [] - - - if stage is None: - generate_postflop_equity_distributions(n_samples, bins, 'flop', save, timer) - generate_postflop_equity_distributions(n_samples, bins, 'turn', save, timer) - generate_postflop_equity_distributions(n_samples, bins, 'river', save, timer) - elif stage == 'flop': - num_community_cards = 3 - elif stage == 'turn': - num_community_cards = 4 - elif stage == 'river': - num_community_cards = 5 - - deck = fast_evaluator.Deck() - for i in tqdm(range(n_samples)): - random.shuffle(deck) - - player_cards = deck[:2] - community_cards = deck[2:2+num_community_cards] - distribution = calculate_equity_distribution(player_cards, community_cards, bins) - equity_distributions.append(distribution) - hands.append(' '.join(player_cards + community_cards)) - - - assert(len(equity_distributions) == len(hands)) - - equity_distributions = np.array(equity_distributions) - print(equity_distributions) - if save: - create_abstraction_folders() - file_id = int(time.time()) # Use the time as the file_id - # TODO: Change to joblib saving for consistency everywhere - with open(f'../data/raw/{stage}/{file_id}_samples={n_samples}_bins={bins}.npy', 'wb') as f: - np.save(f, equity_distributions) - joblib.dump(hands, f'../data/raw/{stage}/{file_id}_samples={n_samples}_bins={bins}') # Store the list of hands, so you can associate a particular distribution with a particular hand - - + +def generate_postflop_equity_distributions( + n_samples, bins, stage=None, save=True, timer=True +): # Lossful abstraction for flop, turn and river + if timer: + start_time = time.time() + assert stage is None or stage == "flop" or stage == "turn" or stage == "river" + equity_distributions = [] + hands = [] + + if stage is None: + generate_postflop_equity_distributions(n_samples, bins, "flop", save, timer) + generate_postflop_equity_distributions(n_samples, bins, "turn", save, timer) + generate_postflop_equity_distributions(n_samples, bins, "river", save, timer) + elif stage == "flop": + num_community_cards = 3 + elif stage == "turn": + num_community_cards = 4 + elif stage == "river": + num_community_cards = 5 + + deck = fast_evaluator.Deck() + for i in tqdm(range(n_samples)): + random.shuffle(deck) + + player_cards = deck[:2] + community_cards = deck[2 : 2 + num_community_cards] + distribution = calculate_equity_distribution(player_cards, community_cards, bins) + equity_distributions.append(distribution) + hands.append(" ".join(player_cards + community_cards)) + + assert len(equity_distributions) == len(hands) + + equity_distributions = np.array(equity_distributions) + print(equity_distributions) + if save: + create_abstraction_folders() + file_id = int(time.time()) # Use the time as the file_id + # TODO: Change to joblib saving for consistency everywhere + with open(f"../data/raw/{stage}/{file_id}_samples={n_samples}_bins={bins}.npy", "wb") as f: + np.save(f, equity_distributions) + joblib.dump( + hands, f"../data/raw/{stage}/{file_id}_samples={n_samples}_bins={bins}" + ) # Store the list of hands, so you can associate a particular distribution with a particular hand + def visualize_clustering(): - """Visualization higher dimensional data is super interesting. - - See `notebooks/abstraction_visualization.ipynb` - """ - return - - -def get_filenames(folder, extension='.npy'): - filenames = [] - - for path in glob.glob(os.path.join(folder, '*' + extension)): - # Extract the filename - filename = os.path.split(path)[-1] - filenames.append(filename) - - return filenames + """Visualization higher dimensional data is super interesting. + + See `notebooks/abstraction_visualization.ipynb` + """ + return + + +def get_filenames(folder, extension=".npy"): + filenames = [] + + for path in glob.glob(os.path.join(folder, "*" + extension)): + # Extract the filename + filename = os.path.split(path)[-1] + filenames.append(filename) + + return filenames + def predict_cluster(kmeans_classifier, cards, n=200): - assert(len(cards) % 2 == 0) - cards_list = [cards[i:i+2] for i in range(0, len(cards), 2)] - equity_distributions = calculate_equity_distribution(cards_list[0:2], cards_list[2:], n=n) - - y = kmeans_classifier.predict([equity_distributions]) - assert(len(y) == 1) - return y[0] + """cards is a list of cards""" + print(cards) + equity_distribution = calculate_equity_distribution(cards[:2], cards[2:], n=n) + equity = calculate_equity(cards[:2], cards[2:], n=1000) + print(equity_distribution) + print( + "averaged historgram: ", + 0.1 * equity_distribution[0] + + 0.3 * equity_distribution[1] + + 0.5 * equity_distribution[2] + + 0.7 * equity_distribution[3] + + 0.9 * equity_distribution[4], + ) + print(equity) + # y = kmeans_classifier.predict([equity_distribution]) + # assert len(y) == 1 + # return y[0] + + +def predict_cluster_fast(cards, n=1000, total_clusters=10): + equity = calculate_equity(cards[:2], cards[2:], n=1000) + cluster = min(total_clusters - 1, int(equity * total_clusters)) + return cluster + + +def batch_predict_clusters(kmeans_classifier, cards_list, n=200): + cards_list = np.array(cards_list) + # player_cards = cards_list + # distribution = calculate_equity_distribution(player_cards, community_cards, bins) + # equity_distributions.append(distribution) + # hands.append(" ".join(player_cards + community_cards)) + # return 1 + def load_kmeans_classifiers(): - raw_dataset_filenames = sorted(get_filenames(f'../data/clusters/flop')) - filename = raw_dataset_filenames[-1] # Take the most recently generated dataset - - centroids = joblib.load(f'../data/clusters/flop/{filename}') - kmeans_flop = KMeans(100) - kmeans_flop.cluster_centers_ = centroids - kmeans_flop._n_threads = -1 - - raw_dataset_filenames = sorted(get_filenames(f'../data/clusters/turn')) - filename = raw_dataset_filenames[-1] # Take the most recently generated dataset - centroids = joblib.load(f'../data/clusters/turn/{filename}') - kmeans_turn = KMeans(100) - kmeans_turn.cluster_centers_ = centroids - kmeans_turn._n_threads = -1 - - raw_dataset_filenames = sorted(get_filenames(f'../data/clusters/river')) - filename = raw_dataset_filenames[-1] # Take the most recently generated dataset - centroids = joblib.load(f'../data/clusters/river/{filename}') - kmeans_river = KMeans(100) - kmeans_river.cluster_centers_ = centroids - kmeans_river._n_threads = -1 - - return kmeans_flop, kmeans_turn, kmeans_river - - -def get_flop_cluster_id(kmeans_flop, cards): - """ - kmeans_flop: KMeans classifier - cards: string of cards in the format '2h2dAsKsQh' or ['2h', '2d', 'As', 'Ks', 'Qh'] - """ - if type(cards) == list: - cards = ''.join(cards) - - assert(len(cards) == 10) # 2 private cards + 3 community cards - return predict_cluster(kmeans_flop, cards) - -def get_turn_cluster_id(kmeans_turn, cards): - """ - kmeans_turn: KMeans classifier - cards: string of cards in the format '2h2dAsKsQh' or ['2h', '2d', 'As', 'Ks', 'Qh'] - """ - if type(cards) == list: - cards = ''.join(cards) - assert(len(cards) == 12) # 2 private cards + 4 community cards - return predict_cluster(kmeans_turn, cards) - -def get_river_cluster_id(kmeans_river, cards): - """ - kmeans_river: KMeans classifier - cards: string of cards in the format '2h2dAsKsQh' or ['2h', '2d', 'As', 'Ks', 'Qh'] - """ - if type(cards) == list: - cards = ''.join(cards) - - assert(len(cards) == 14) # 2 private cards + 5 community cards - return predict_cluster(kmeans_river, cards) - + raw_dataset_filenames = sorted(get_filenames(f"../data/clusters/flop")) + filename = raw_dataset_filenames[-1] # Take the most recently generated dataset + + centroids = joblib.load(f"../data/clusters/flop/{filename}") + kmeans_flop = KMeans(100) + kmeans_flop.cluster_centers_ = centroids + kmeans_flop._n_threads = -1 + + raw_dataset_filenames = sorted(get_filenames(f"../data/clusters/turn")) + filename = raw_dataset_filenames[-1] # Take the most recently generated dataset + centroids = joblib.load(f"../data/clusters/turn/{filename}") + kmeans_turn = KMeans(100) + kmeans_turn.cluster_centers_ = centroids + kmeans_turn._n_threads = -1 + + raw_dataset_filenames = sorted(get_filenames(f"../data/clusters/river")) + filename = raw_dataset_filenames[-1] # Take the most recently generated dataset + centroids = joblib.load(f"../data/clusters/river/{filename}") + kmeans_river = KMeans(100) + kmeans_river.cluster_centers_ = centroids + kmeans_river._n_threads = -1 + + return kmeans_flop, kmeans_turn, kmeans_river + + import argparse from sklearn.cluster import KMeans + if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Generate Poker Hand Abstractions.") - parser.add_argument("-g", "--generate", - action="store_true", dest="generate", default=False, - help="Generate Abstractions.") - parser.add_argument("--n_samples", default=10000, - dest="n_samples", - help="Number of samples to sample from to generate the abstraction.") - parser.add_argument("--n_clusters", default=50, - dest="n_clusters", - help="Number of clusters to generate.") - parser.add_argument("-b", "--bins", default=5, - dest="bins", - help="The granularity of your generated data.") - parser.add_argument("-s", "--stage", default='turn', - dest="stage", - help="Select the stage of the game that you would like to abstract (flop, turn, river).") - # Hyperparamtesrs - args = parser.parse_args() - - generate = args.generate # Generate histogram distributions to cluster on - clustering = True # Cluster these histogram distributions - - stage = args.stage - n_samples = int(args.n_samples) - bins = args.bins - - if generate: - generate_postflop_equity_distributions(n_samples, bins, stage) - - if clustering: - raw_dataset_filenames = sorted(get_filenames(f'../data/raw/{stage}')) - filename = raw_dataset_filenames[-1] # Take the most recently generated dataset to run our clustering on - - equity_distributions = np.load(f'../data/raw/{stage}/{filename}') # TODO: Switch to joblib - print(filename) - if not os.path.exists(f'../data/clusters/{stage}/{filename}'): - print(f"Generating the cluster for the {stage}") - print(filename) - kmeans = KMeans(100) # 100 Clusters seems good using the Elbow Method, see `notebook/abstraction_exploration.ipynb` for more details - kmeans.fit(equity_distributions) # Perform Clustering - centroids = kmeans.cluster_centers_ - joblib.dump(centroids, f'../data/clusters/{stage}/{filename}') - else: # Centroids have already been generated, just load them, which are tensors - centroids = joblib.load(f'../data/clusters/{stage}/{filename}') - # Load KMeans Model - kmeans = KMeans(100) - kmeans.cluster_centers_ = centroids - kmeans._n_threads = -1 - - centroids = joblib.load(f'../data/clusters/{stage}/{filename}') - # Load KMeans Model - - predict = False - - - - - # # Visualization of the hands - # hands = joblib.load(f'data/raw/{stage}/{filename.split(".")[0]}') - # for i in range(equity_distributions.shape[0]): - # hand = hands[i] - # hand = hand.split(' ') - # player_cards = hand[0] - # community_cards = hand[0] - # plot_equity_hist(equity_distributions[i], player_cards, community_cards) - - # Visualize the clusstering - - - - - - - + parser = argparse.ArgumentParser(description="Generate Poker Hand Abstractions.") + parser.add_argument( + "-g", + "--generate", + action="store_true", + dest="generate", + default=False, + help="Generate Abstractions.", + ) + parser.add_argument( + "--n_samples", + default=10000, + dest="n_samples", + help="Number of samples to sample from to generate the abstraction.", + ) + parser.add_argument( + "--n_clusters", default=50, dest="n_clusters", help="Number of clusters to generate." + ) + parser.add_argument( + "-b", "--bins", default=5, dest="bins", help="The granularity of your generated data." + ) + parser.add_argument( + "-s", + "--stage", + default="turn", + dest="stage", + help="Select the stage of the game that you would like to abstract (flop, turn, river).", + ) + # Hyperparamtesrs + args = parser.parse_args() + + generate = args.generate # Generate histogram distributions to cluster on + clustering = True # Cluster these histogram distributions + + stage = args.stage + n_samples = int(args.n_samples) + bins = args.bins + + if generate: + generate_postflop_equity_distributions(n_samples, bins, stage) + + if clustering: + raw_dataset_filenames = sorted(get_filenames(f"../data/raw/{stage}")) + filename = raw_dataset_filenames[ + -1 + ] # Take the most recently generated dataset to run our clustering on + + equity_distributions = np.load(f"../data/raw/{stage}/{filename}") # TODO: Switch to joblib + print(filename) + if not os.path.exists(f"../data/clusters/{stage}/{filename}"): + print(f"Generating the cluster for the {stage}") + print(filename) + kmeans = KMeans( + 100 + ) # 100 Clusters seems good using the Elbow Method, see `notebook/abstraction_exploration.ipynb` for more details + kmeans.fit(equity_distributions) # Perform Clustering + centroids = kmeans.cluster_centers_ + joblib.dump(centroids, f"../data/clusters/{stage}/{filename}") + else: # Centroids have already been generated, just load them, which are tensors + centroids = joblib.load(f"../data/clusters/{stage}/{filename}") + # Load KMeans Model + kmeans = KMeans(100) + kmeans.cluster_centers_ = centroids + kmeans._n_threads = -1 + + centroids = joblib.load(f"../data/clusters/{stage}/{filename}") + # Load KMeans Model + + predict = False + + # # Visualization of the hands + # hands = joblib.load(f'data/raw/{stage}/{filename.split(".")[0]}') + # for i in range(equity_distributions.shape[0]): + # hand = hands[i] + # hand = hand.split(' ') + # player_cards = hand[0] + # community_cards = hand[0] + # plot_equity_hist(equity_distributions[i], player_cards, community_cards) + + # Visualize the clusstering diff --git a/src/base.py b/src/base.py index b56efa6..2fc5ba9 100644 --- a/src/base.py +++ b/src/base.py @@ -100,8 +100,8 @@ class InfoSet: """ - def __init__(self, infoSet: List[Action], actions: List[Action], player: Player): - self.infoSet = infoSet + def __init__(self, infoSet_key: List[Action], actions: List[Action], player: Player): + self.infoSet = infoSet_key self.__actions = actions self.__player = player @@ -161,7 +161,7 @@ def __init__( create_history, n_players: int = 2, iterations: int = 1000000, - tracker_interval=50000, + tracker_interval=1000, ): self.n_players = n_players self.iterations = iterations @@ -340,7 +340,7 @@ def vanilla_cfr_manim( def mccfr( self, history: History, i: Player, t: int, pi_0: float, pi_1: float, debug=False ): # Works for two players - return + raise NotImplementedError() def solve(self, method="vanilla_speedup", debug=False): util_0 = 0 @@ -350,7 +350,7 @@ def solve(self, method="vanilla_speedup", debug=False): for t in tqdm(range(self.iterations), desc="CFR Training Loop"): if method == "vanilla_speedup": - util_0 += self.vanilla_cfr_speedup(self.create_history(), t, 1, 1, debug=debug) + util_0 += self.vanilla_cfr_speedup(self.create_history(t), t, 1, 1, debug=debug) elif method == "manim" and t < 10: for player in range(self.n_players): @@ -384,11 +384,14 @@ def solve(self, method="vanilla_speedup", debug=False): self.tracker(self.infoSets) self.tracker.pprint() + if t % 2500 == 0: + self.export_infoSets(f"infoSets_{t}.joblib") + if method == "manim": return histories - def export_infoSets(self): - joblib.dump(self.infoSets, "holdem_infoSets.joblib") + def export_infoSets(self, filename = "infoSets.joblib"): + joblib.dump(self.infoSets, filename) def get_expected_value( self, history: History, player: Player, player_strategy=None, opp_strategy=None @@ -479,7 +482,6 @@ def get_best_response(self, history: History, player: Player, player_strategy=No The algorithm is the following: 1. For each action that the opponent can play after our decision, we see what happens if our opponent sticks to that strategy. Whatever action our opponent chooses that minimizes our expected value is the one. - """ sample_a = infoSet.actions()[0] sample_opp_history = ( diff --git a/src/environment.py b/src/environment.py index 4317150..03e57e7 100644 --- a/src/environment.py +++ b/src/environment.py @@ -2,6 +2,15 @@ from evaluator import * from typing import List from holdem import HoldEmHistory, HoldemInfoSet # To get the legal actions +from abstraction import predict_cluster_fast +import joblib + + +def load_holdem_infosets(): + print("loading holdem infosets") + global holdem_infosets + holdem_infosets = joblib.load("../src/infoSets_300.joblib") + print("loaded holdem infosets!") class Player: # This is the POV @@ -26,29 +35,26 @@ def clear_hand(self): self.hand = [] def place_bet(self, action: str, observed_env) -> int: - hist: HoldEmHistory = observed_env.history - legal_actions = hist.actions() - if action not in legal_actions: - raise Exception(f"Invalid Action: {action}") - current_game_stage_history, stage = hist.get_current_game_stage_history() + legal_actions = observed_env.infoset.actions() + print("here are your legal actions that the AI can react to", legal_actions) + # make action to nearest number + # ----- BET ABSTRACTION ------ + if action[0] == "b": + closest_action = legal_actions[-1] + for legal_action in legal_actions: + if int(action[1:]) < int(legal_action[1:]): + closest_action = legal_action + break + else: + closest_action = action - if action == "k": # check - if stage == "preflop": - self.current_bet = 2 # BB - else: - self.current_bet = 0 + if closest_action not in legal_actions: + raise Exception(f"Invalid Action: {action}") - elif action == "c": - # If you call on the preflop - if len(hist.history) == 2: - self.current_bet = 2 - else: - self.current_bet = int(hist.history[-1][1:]) + print("closest bet found", closest_action) - elif action[0] == "b": # bet X amount - self.current_bet = int(action[1:]) - return action + return closest_action def calculate_pot_odds( self, @@ -58,6 +64,13 @@ def calculate_pot_odds( """ +import numpy as np + + +def getAction(strategy): + return np.random.choice(strategy.keys(), p=strategy.values()) + + class AIPlayer(Player): def __init__(self, balance) -> None: super().__init__(balance) @@ -67,27 +80,27 @@ def __init__(self, balance) -> None: def place_bet(self, observed_env) -> int: # AI will call every time # Very similar function to Player.place_bet, we only call and check action = "k" - hist: HoldEmHistory = observed_env.history - legal_actions = hist.actions() - if action not in legal_actions: - action = "c" - - if action not in legal_actions: - raise Exception("AI found no legal actions", hist.actions()) + strategy = observed_env.infoset.get_average_strategy() + action = getAction(strategy) + print("AI strategy", strategy) + print("AI action", action) - current_game_stage_history, stage = hist.get_current_game_stage_history() if action == "k": # check - if stage == "preflop": + if observed_env.game_stage == 2: self.current_bet = 2 else: self.current_bet = 0 elif action == "c": # If you call on the preflop - if len(hist.history) == 2: + if observed_env.game_stage == 2: self.current_bet = observed_env.big_blind else: # Set the current bet to the amount of the last bet - self.current_bet = int(hist.history[-1][1:]) + self.current_bet = observed_env.players[ + (observed_env.position_in_play + 1) % 2 + ].current_bet + else: + self.current_bet = int(action[1:]) return action @@ -101,16 +114,20 @@ def __init__(self) -> None: self.players: List[Player] = [] self.deck = Deck() + load_holdem_infosets() + """Game Stages: 1: Starting a new round, giving players their cards. Automatically goes into state 2 2: Preflop betting round. Goes into state 3 once everyone has made their decision - 3: Flop round. Goes into turn (state 4) /ends round (state 6) once everyone " " - 4: Turn round. Goes into river (state 5) /ends round (state 6) once everyone " " - 5: River round. Ends round (state 6) once everyone " " + 3: Flop round. Goes into turn (state 4) /ends round (state 6) once everyone " " + 4: Turn round. Goes into river (state 5) /ends round (state 6) once everyone " " + 5: River round. Ends round (state 6) once everyone " " 6: Round is over. Distribute pot winnings. - + Game Stage - 2 = number of "/" in the holdem infoset and history """ + self.play_as_AI = True # play as the AI (used in video) + self.game_stage = 1 # To keep track of which phase of the game we are at, new_round is 0 # If self.finished_playing_game_stage = True, we can move to the next game state. This is needed to go around each player and await their decision self.finished_playing_game_stage = False @@ -128,9 +145,10 @@ def __init__(self) -> None: self.new_player_balance = 100 self.small_blind = 1 self.big_blind = 2 - self.history: HoldEmHistory = ( - HoldEmHistory() - ) # THis will be the history that will be fed into the AI + # holdem infosets + # use the infosets for the AI to make predictions + self.infoSet_key = [] + self.infoset = None self.players_balance_history = [] # List of "n" list for "n" players @@ -142,6 +160,7 @@ def get_player(self, idx) -> Player: def add_AI_player(self): # Add a dumb AI self.players.append(AIPlayer(self.new_player_balance)) + self.AI_player_idx = len(self.players) - 1 def get_winning_players(self) -> List: # If there is more than one winning player, the pot is split. We assume that we only run things once @@ -214,8 +233,6 @@ def start_new_round(self): self.stage_pot_balance = 0 self.total_pot_balance = 0 - self.history = HoldEmHistory() # Reset the history - # Move the dealer position and assign the new small and big blinds self.dealer_button_position += 1 self.dealer_button_position %= len(self.players) @@ -238,12 +255,15 @@ def start_new_round(self): for _ in range(len(self.players)): position_to_deal %= len(self.players) card_str = "" - for _ in range(2): - card = self.deck.draw() + for i in range(2): + if self.play_as_AI and self.players[position_to_deal].is_AI: + card = Card(input(f"Enter the {i}-th card that was dealt to the AI (ex: Ah): ")) + else: + card = self.deck.draw() + card_str += str(card) self.players[position_to_deal].add_card_to_hand(card) - self.history += card_str position_to_deal += 1 self.finished_playing_game_stage = True @@ -251,7 +271,6 @@ def start_new_round(self): def update_stage_pot_balance(self): """ Assumes the balances from the players are correct - """ self.stage_pot_balance = 0 for player in self.players: @@ -263,7 +282,9 @@ def play_current_stage(self, action: str = ""): action = self.players[self.position_in_play].place_bet( self ) # Pass the Environment as an argument - self.history += action + + self.infoSet_key += [action] + self.infoset = holdem_infosets["".join(self.infoSet_key)] else: # Real player's turn if action == "": # No decision has yet been made @@ -315,18 +336,47 @@ def play_preflop(self): self.finished_playing_game_stage = False + # Assign to cluster + + if self.position_in_play != self.AI_player_idx: # AI doesn't know what the opponent has + self.infoSet_key += ["?"] + self.infoSet_key += [ + predict_cluster_fast( + [str(card) for card in self.players[self.AI_player_idx].hand], + n=3000, + total_clusters=20, + ) + ] + else: + self.infoSet_key += [ + predict_cluster_fast( + [str(card) for card in self.players[self.AI_player_idx].hand], + n=3000, + total_clusters=20, + ) + ] + self.infoSet_key += ["?"] + + self.infoset = holdem_infosets["".join(self.infoSet_key)] + def play_flop(self): # 3. Flop - self.history += "/" + self.infoSet_key += ["/"] self.deck.draw() # We must first burn one card, TODO: Show on video - cards = "" - for _ in range(3): # Draw 3 cards - self.community_cards.append(self.deck.draw()) - cards += str(self.community_cards[-1]) + for i in range(3): # Draw 3 cards + if self.play_as_AI: + card = Card(input(f"Input the {i}-th community card (ex: 'Ah'): ")) + else: + card = self.deck.draw() + + self.community_cards.append(card) + + cards = [str(card) for card in self.community_cards] - self.history += cards + self.infoSet_key += [predict_cluster_fast(cards, n=1000, total_clusters=10)] + self.infoset = holdem_infosets["".join(self.infoSet_key)] # The person that should play is the first person after the dealer position self.position_in_play = self.dealer_button_position @@ -337,12 +387,18 @@ def play_flop(self): def play_turn(self): # 4. Turn - self.history += "/" + self.infoSet_key += ["/"] self.deck.draw() # We must first burn one card, TODO: Show on video - self.community_cards.append(self.deck.draw()) + if self.play_as_AI: + card = Card(input("Input the turn card (ex: '5d'): ")) + else: + card = self.deck.draw() - self.history += str(self.community_cards[-1]) + self.community_cards.append(card) + cards = [str(card) for card in self.community_cards] + self.infoSet_key += [predict_cluster_fast(cards, n=500, total_clusters=5)] + self.infoset = holdem_infosets["".join(self.infoSet_key)] # The person that should play is the first person after the dealer position self.position_in_play = self.dealer_button_position @@ -353,12 +409,18 @@ def play_turn(self): def play_river(self): # 5. River - self.history += "/" + self.infoSet_key += ["/"] self.deck.draw() # We must first burn one card, TODO: Show on video - self.community_cards.append(self.deck.draw()) + if self.play_as_AI: + card = input("Input the river card (ex: '5d'): ") + else: + card = self.deck.draw() - self.history += str(self.community_cards[-1]) + self.community_cards.append(card) + cards = [str(card) for card in self.community_cards] + self.infoSet_key += [predict_cluster_fast(cards, n=200, total_clusters=5)] + self.infoset = holdem_infosets["".join(self.infoSet_key)] self.finished_playing_game_stage = False @@ -413,11 +475,14 @@ def handle_game_stage(self, action=""): self.finished_playing_game_stage = ( False # on the next call of the handler, we will start a new round ) + + print(self.infoSet_key) else: if self.game_stage == 1: # This function was put here instead of at game_stage == 6 to visualize the game self.distribute_pot_to_winning_players() self.start_new_round() + self.handle_game_stage() else: self.play_current_stage(action) diff --git a/src/fast_evaluator.py b/src/fast_evaluator.py index 0386940..114657e 100644 --- a/src/fast_evaluator.py +++ b/src/fast_evaluator.py @@ -5,6 +5,32 @@ import random from phevaluator import evaluate_cards +import numpy as np +import treys + +def phEvaluatorSetup(n): + """ + Sets up n scenarios using the phevaluator library. + """ + deck = [] + + def shuffle_deck(): + deck.clear() + for rank in ["A", "2", "3", "4", "5", "6", "7", "8", "9", "T", "J", "Q", "K"]: + for suit in ["h", "d", "s", "c"]: + deck.append(rank + suit) + random.shuffle(deck) + + boards = [] + player_hands = [] + opponent_hands = [] + for _ in range(n): + shuffle_deck() + boards.append(deck[:5]) + player_hands.append(deck[5:7]) + opponent_hands.append(deck[7:9]) + + return boards, player_hands, opponent_hands def Deck(excluded_cards=[]): diff --git a/src/holdem.py b/src/holdem.py index 713c282..8cb3f6b 100644 --- a/src/holdem.py +++ b/src/holdem.py @@ -1,17 +1,52 @@ import base +import numpy as np from base import Player, Action -import random +from tqdm import tqdm from typing import NewType, Dict, List, Callable, cast import copy from fast_evaluator import Deck -from abstraction import * +from abstraction import ( + get_preflop_cluster_id, + predict_cluster, + predict_cluster_fast, + load_kmeans_classifiers, +) +from fast_evaluator import phEvaluatorSetup, evaluate_cards +import time + + +# ----- GLOBAL VARIABLES Load the pre-generated dataset ----- +def load_dataset(): + global boards, player_hands, opponent_hands + global player_preflop_clusters, player_flop_clusters, player_turn_clusters, player_river_clusters + global opp_preflop_clusters, opp_flop_clusters, opp_turn_clusters, opp_river_clusters + global winners + + # Load the pre-generated dataset + boards = np.load("dataset/boards.npy").tolist() + player_hands = np.load("dataset/player_hands.npy").tolist() + opponent_hands = np.load("dataset/opponent_hands.npy").tolist() + + # Load player clusters + player_preflop_clusters = np.load("dataset/player_preflop_clusters.npy").tolist() + player_flop_clusters = np.load("dataset/player_flop_clusters.npy").tolist() + player_turn_clusters = np.load("dataset/player_turn_clusters.npy").tolist() + player_river_clusters = np.load("dataset/player_river_clusters.npy").tolist() + + # Load opponent clusters + opp_preflop_clusters = np.load("dataset/opp_preflop_clusters.npy").tolist() + opp_flop_clusters = np.load("dataset/opp_flop_clusters.npy").tolist() + opp_turn_clusters = np.load("dataset/opp_turn_clusters.npy").tolist() + opp_river_clusters = np.load("dataset/opp_river_clusters.npy").tolist() + + winners = np.load("dataset/winners.npy") class HoldEmHistory(base.History): """ Example of history: First two actions are the cards dealt to the players. The rest of the actions are the actions taken by the players. - 1. ['AkTh', 'QdKd', 'b2', 'c', '/', 'QhJdKs', 'b2', 'c', '/', 'k', 'k'] + 1. ['AkTh', 'QdKd', 'b2', 'c', '/', 'QhJdKs', 'b2', 'c', '/', 'Kh', 'k', 'k', ...] ---- ACTIONS ---- - k = check @@ -36,98 +71,89 @@ class HoldEmHistory(base.History): I want to avoid all the extra overhead, so taking inspiration from `environment.py` with the `PokerEnvironment` - """ - def __init__(self, history: List[Action] = []): + def __init__(self, history: List[Action] = [], sample_id=0): super().__init__(history) + self.sample_id = sample_id % len(player_hands) + self.stage_i = history.count("/") def is_terminal(self): if len(self.history) == 0: return False folded = self.history[-1] == "f" - is_showdown = ( - self.history.count("/") == 3 and self.history[-1] == "c" + is_showdown = self.history.count("/") == 3 and ( + self.history[-1] == "c" # call + or self.history[-2:] == ["k", "k"] # check,check + or self._get_total_pot_size() == 200 # all-in ) # Showdown, since one of the players is calling if folded or is_showdown: return True else: return False - def get_current_cards(self): - current_cards = [] - new_stage = False - stage_i = 0 - for i, action in enumerate(self.history): - if new_stage: - new_stage = False - if stage_i == 1: # Flop, so there are 3 community cards - assert len(action) == 6 - current_cards.append(action[:2]) # Community card 1 - current_cards.append(action[2:4]) # Community card 2 - current_cards.append(action[4:6]) # Community card 3 - - else: # Turn or river - current_cards.append(action) # Community card - elif action == "/": - new_stage = True - stage_i += 1 - - elif i == 0 or i == 1: - assert len(action) == 4 - current_cards.append(action[:2]) # Private card 1 - current_cards.append(action[2:4]) # Private card 2 - - return current_cards - - def get_current_game_stage_history(self): - """ - return current_game_stage_history, stages[stage_i] excluding the community cards drawn. We only care about the actions - of the players. - """ - game_stage_start = 2 # Because we are skipping the pairs of private cards drawn at the beginning of the round - stage_i = 0 - stages = ["preflop", "flop", "turn", "river"] - for i, action in enumerate(self.history): - if action == "/": - game_stage_start = i + 2 # Skip the community card - stage_i += 1 - - if game_stage_start >= len(self.history): - return [], stages[stage_i] - else: - current_game_stage_history = self.history[game_stage_start:] - return current_game_stage_history, stages[stage_i] - def actions(self): - if self.is_chance(): + if self.is_chance(): # draw cards if len(self.history) > 2 and self.history[-1] != "/": return ["/"] else: - cards_to_exclude = self.get_current_cards() - cards = Deck(cards_to_exclude) - return cards + # cards_to_exclude = self._get_current_cards() + # cards = Deck(cards_to_exclude) + # return cards + return ( + [] + ) # This should return the entire deck with current cards removed, but I do this for speedup by loading an existing dataset elif not self.is_terminal(): assert ( - not self.game_stage_ended() + not self._game_stage_ended() ) # game_stage_ended would mean that it is a chance node """ - To limit this game going to infinity, I only allow for 3 betting rounds. - I.e. if I bet, you raise, I raise, you raise, then I must either call, fold, or all-in. Else the branching factor is going to be insane. - """ + To limit this game going to infinity, I only allow for 3 betting rounds. + I.e. if I bet, you raise, I raise, you raise, then I must either call, fold, or all-in. Else the branching factor is going to be insane. + """ actions = ["k", "c", "f"] player = self.player() - remaining_amount = self.get_remaining_balance(player) - min_bet = self.get_min_bet() - - for bet_size in range( - min_bet, remaining_amount + 1 - ): # These define the legal actions of the game - actions.append("b" + str(bet_size)) + remaining_amount = self._get_remaining_balance( + player, include_curr_stage=False + ) # this is how much they can put in this round + opp_remaining_amount = self._get_remaining_balance( + (player + 1) % 2, include_curr_stage=True + ) # this is how much the opponent can put in this round + pot_size = self._get_total_pot_size() + min_bet = self._get_min_bet() + + if opp_remaining_amount == 0 and remaining_amount > 0: + return ["c", "f"] + + current_game_stage_history, stage = self._get_current_game_stage_history() + + # ------ BET ABSTRACTION ------ + # doing the abstraction here because the number of actions is too large, potential raise values + # for preflop, 4 choices: check, call, 2x pot, all-in, fold + # for flop 5 choices: check, 1/2 pot, pot, 2x pot, all-in, fold + # For turn, 5 choices: check, 1/2 pot, pot, 2x pot, all-in, fold + # For river, 5 choices: check, 1/2 pot, pot, 2x pot, all-in, fold + + # Abstract away the actions, since there are too many of them + # history = infoSet_key + + if len(current_game_stage_history) > 3: # prevent the game from going on forever + return ["c", "f", "b" + str(remaining_amount)] # all-in + + if ( + stage != "preflop" + and int(0.5 * pot_size) < remaining_amount + and int(0.5 * pot_size) >= min_bet + ): # 1/2 pot + actions.append("b" + str(int(0.5 * pot_size))) + if stage != "preflop" and pot_size < remaining_amount: # pot + actions.append("b" + str(pot_size)) + if 2 * pot_size < remaining_amount: # 2x pot + actions.append("b" + str(2 * pot_size)) + actions.append("b" + str(remaining_amount)) # all-in - current_game_stage_history, stage = self.get_current_game_stage_history() # Pre-flop if stage == "preflop": # Small blind to act @@ -141,8 +167,9 @@ def actions(self): elif len(current_game_stage_history) == 1: # 2-bet if ( current_game_stage_history[0] == "c" - ): # Small blind called, you don't need to fold + ): # Small blind called, you don't need to fold, but you also can't call actions.remove("f") + actions.remove("c") return actions else: # Other player has bet, so you cannot check actions.remove("k") @@ -160,9 +187,11 @@ def actions(self): else: # flop, turn, river if len(current_game_stage_history) == 0: actions.remove("f") # You cannot fold + actions.remove("c") # You cannot call elif len(current_game_stage_history) == 1: if current_game_stage_history[0] == "k": actions.remove("f") + actions.remove("c") else: # Opponent has bet, so you cannot check actions.remove("k") else: @@ -172,7 +201,112 @@ def actions(self): else: raise Exception("Cannot call actions on a terminal history") - def get_min_bet(self): + def player(self): + """ + This part is confusing for heads-up no limit poker, because the player that acts first changes: + The Small Blind (SB) acts first pre-flop, but the Big Blind (BB) acts first post-flop. (see https://en.wikipedia.org/wiki/Texas_hold_%27em) + 1. ['AkTh', 'QdKd', 'b2', 'c', '/', 'Qh', 'b2', 'c', '/', '2d', b2', 'f'] + SB BB BB SB BB SB + """ + if len(self.history) <= 1: + return -1 + elif self._game_stage_ended(): + return -1 + elif self.history[-1] == "/": + return -1 + else: + if "/" in self.history: + return (len(self.history) + 1) % 2 # Order is flipped post-flop + else: + return len(self.history) % 2 + + def is_chance(self): + return super().is_chance() + + def sample_chance_outcome(self): + assert self.is_chance() + + # slow way, sampling manually without abstractions + # cards = self.actions() # Will be either or cards not seen in the deck or ['/'] + # if len(self.history) <= 1: # We need to deal two cards to each player + # cards = random.sample(cards, 2) + # return "".join(cards) + # else: + # return random.choice(cards) # Sample one of the community cards with equal probability + + if len(self.history) == 0: + return "".join(player_hands[self.sample_id]) + elif len(self.history) == 1: + return "".join(opponent_hands[self.sample_id]) + elif self.history[-1] != "/": + return "/" + elif self.stage_i == 1: + return "".join(boards[self.sample_id][:3]) + elif self.stage_i == 2: + return boards[self.sample_id][3] + elif self.stage_i == 3: + return boards[self.sample_id][4] + + def terminal_utility(self, i: Player) -> int: + assert self.is_terminal() # We can only call the utility for a terminal history + assert i in [0, 1] # Only works for 2 player games for now + winner = winners[self.sample_id] + if winner == 0: # tie + return 0 + + pot_size = self._get_total_pot_size() + + if winner == i: + return pot_size + else: + return -pot_size + + def _get_current_cards(self): + current_cards = [] + new_stage = False + stage_i = 0 + for i, action in enumerate(self.history): + if new_stage: + new_stage = False + if stage_i == 1: # Flop, so there are 3 community cards + assert len(action) == 6 + current_cards.append(action[:2]) # Community card 1 + current_cards.append(action[2:4]) # Community card 2 + current_cards.append(action[4:6]) # Community card 3 + + else: # Turn or river + current_cards.append(action) # Community card + elif action == "/": + new_stage = True + stage_i += 1 + + elif i == 0 or i == 1: + assert len(action) == 4 + current_cards.append(action[:2]) # Private card 1 + current_cards.append(action[2:4]) # Private card 2 + + return current_cards + + def _get_current_game_stage_history(self): + """ + return current_game_stage_history, stages[stage_i] excluding the community cards drawn. We only care about the actions + of the players. + """ + game_stage_start = 2 # Because we are skipping the pairs of private cards drawn at the beginning of the round + stage_i = 0 + stages = ["preflop", "flop", "turn", "river"] + for i, action in enumerate(self.history): + if action == "/": + game_stage_start = i + 2 # Skip the community card + stage_i += 1 + + if game_stage_start >= len(self.history): + return [], stages[stage_i] + else: + current_game_stage_history = self.history[game_stage_start:] + return current_game_stage_history, stages[stage_i] + + def _get_min_bet(self): # TODO: Test this function curr_bet = 0 prev_bet = 0 @@ -186,7 +320,7 @@ def get_min_bet(self): break # Handle case when game stage is preflop, in which case a bet is already placed for you - game_stage_history, game_stage = self.get_current_game_stage_history() + game_stage_history, game_stage = self._get_current_game_stage_history() if game_stage == "preflop" and curr_bet == 0: curr_bet = 2 # big blind elif curr_bet == 0: # No bets has been placed @@ -195,13 +329,23 @@ def get_min_bet(self): return int(curr_bet + (curr_bet - prev_bet)) # This is the minimum raise - def calculate_player_total_up_to_game_stage(self, player: Player): + def _calculate_player_total(self, player: Player, include_curr_stage=False): + """ + This is the amount of money a player has put into the pot, INCLUDING the current game stage. + + In preflop, this is 0. + """ stage_i = 0 - player_total = 0 # Total across all game stages (preflop, flop, turn, river) - player_game_stage_total = 0 # Total for a given game stage + # Total across all game stages (preflop, flop, turn, river) + # initial values for big and small blind + player_total = 0 + if player == 0: + player_game_stage_total = 1 + else: + player_game_stage_total = 2 + i = 0 for hist_idx, hist in enumerate(self.history): - i = (i + 1) % 2 if i == player: if hist[0] == "b": player_game_stage_total = int(hist[1:]) @@ -217,6 +361,8 @@ def calculate_player_total_up_to_game_stage(self, player: Player): else: player_game_stage_total = int(self.history[hist_idx - 1][1:]) + i = (i + 1) % 2 + if hist == "/": stage_i += 1 player_total += player_game_stage_total @@ -226,15 +372,42 @@ def calculate_player_total_up_to_game_stage(self, player: Player): i + 1 ) % 2 # We need to flip the order post-flop, as the BB is the one who acts first now + if include_curr_stage: + player_total += player_game_stage_total return player_total - def get_remaining_balance(self, player: Player): + def _get_remaining_balance(self, player: Player, include_curr_stage=False): # Each player starts with a balance of 100 at the beginning of each hand - return 100 - self.calculate_player_total_up_to_game_stage(player) + return 100 - self._calculate_player_total(player, include_curr_stage=include_curr_stage) + + def _get_stage_pot_size(self): + game_stage_history, stage = self._get_current_game_stage_history() + max_player_bet = 0 + max_opp_bet = 0 + if stage == "preflop": + max_player_bet = 1 + max_opp_bet = 2 + if "c" in game_stage_history: + max_player_bet = 2 + + for i, action in enumerate(game_stage_history): + if action[0] == "b": + if i % 2 == 0: + max_player_bet = int(action[1:]) + else: + max_opp_bet = int(action[1:]) + + pot_size = max_player_bet + max_opp_bet + return pot_size + + def _get_total_pot_size(self): + return +self._calculate_player_total( + 0, include_curr_stage=True + ) + self._calculate_player_total(1, include_curr_stage=True) - def game_stage_ended(self): + def _game_stage_ended(self): # TODO: Make sure this logic is good - current_game_stage_history, stage = self.get_current_game_stage_history() + current_game_stage_history, stage = self._get_current_game_stage_history() if len(current_game_stage_history) == 0: return False elif current_game_stage_history[-1] == "f": @@ -243,136 +416,45 @@ def game_stage_ended(self): current_game_stage_history[-1] == "c" and len(self.history) > 3 ): # On pre-flop, when the small blind calls, the opponent can still bet return True - elif len(current_game_stage_history) >= 2 and current_game_stage_history[-2:] == ["k", "k"]: + elif stage == "preflop" and current_game_stage_history[-2:] == ["c", "k"]: + return True + elif len(current_game_stage_history) >= 2 and current_game_stage_history[-2:] == [ + "k", + "k", + ]: # check, check return True else: + # both players all-in, with a call (IMPORTANT ASSUMPTION: both players starting balance is the same) + remaining_amount = self._get_remaining_balance(0, include_curr_stage=True) + opp_remaining_amount = self._get_remaining_balance(1, include_curr_stage=True) + if remaining_amount == 0 and opp_remaining_amount == 0: + return True return False - def player(self): - """ - This part is confusing for heads-up no limit poker, because the player that acts first changes: - The Small Blind (SB) acts first pre-flop, but the Big Blind (BB) acts first post-flop. - 1. ['AkTh', 'QdKd', 'b2', 'c', '/', 'Qh', 'b2', 'c', '/', '2d', b2', 'f'] - SB BB BB SB BB SB - """ - if len(self.history) <= 1: - return -1 - elif self.game_stage_ended(): - return -1 - elif self.history[-1] == "/": - return -1 - else: - if "/" in self.history: - return (len(self.history) + 1) % 2 # Order is flipped post-flop - else: - return len(self.history) % 2 - - def is_chance(self): - return super().is_chance() - - def sample_chance_outcome(self): - assert self.is_chance() - - cards = self.actions() # Will be either or cards not seen in the deck or ['/'] - - if len(self.history) <= 1: # We need to deal two cards to each player - cards = random.sample(cards, 2) - return "".join(cards) - else: - return random.choice(cards) # Sample one of the community cards with equal probability - - def terminal_utility(self, i: Player) -> int: - assert self.is_terminal() # We can only call the utility for a terminal history - assert i in [0, 1] # Only works for 2 player games for now - - actions = ["k", "b1", "b2", "b4", "b8", "all-in", "c", "f"] - pot_size = 0 - # These represent the bets in the current game stage, i.e. pre-flop, flop, turn, river - prev_bet = 1 # small blind starting value - curr_bet = 2 # big blind starting value - for i, action in enumerate(self.history): - if action == "/": # Move on to next stage - assert curr_bet == prev_bet and curr_bet == 0 - pot_size += curr_bet - prev_bet = 0 - - if action not in actions: - continue - - if action == "k": - assert curr_bet == prev_bet and curr_bet == 0 - - elif action == "b1": - assert curr_bet == 0 - curr_bet = 1 - - elif action == "b2": - if curr_bet == 0: - assert prev_bet == 0 - curr_bet = 2 - else: - prev_bet = curr_bet - curr_bet *= 2 - elif action == "b4": - if curr_bet == 0: - assert prev_bet == 0 - curr_bet = 4 - else: - prev_bet = curr_bet - curr_bet *= 4 - elif action == "b8": - if curr_bet == 0: - assert prev_bet == 0 - curr_bet = 8 - else: - prev_bet == curr_bet - curr_bet *= 8 - - elif action == "all-in": - curr_bet = 100 - pot_size - curr_bet # Maximum, since each player has 100 chips - - elif action == "c": - assert curr_bet != 0 - pot_size += 2 * curr_bet - curr_bet = 0 - prev_bet = 0 - - elif action == "f": - assert i == len(self.history) - 1 # Folding should be the last action - - pot_size += prev_bet - pot_size += curr_bet - - else: - raise Exception("Action not recognized") - - # Now that we know how much we won from the pot, we also we to calculate how much we made ourselves - def __add__(self, action: Action): - new_history = HoldEmHistory(self.history + [action]) + new_history = HoldEmHistory(self.history + [action], self.sample_id) return new_history - def get_infoSet_key(self, kmeans_flop, kmeans_turn, kmeans_river) -> List[Action]: + def get_infoSet_key(self) -> List[Action]: + """ + This is where we abstract away cards and bet sizes. + """ assert not self.is_chance() assert not self.is_terminal() player = self.player() history = copy.deepcopy(self.history) - print(history) - # ----- Assign cluster ID for PREFLOP ----- - player_cards = [] + # ------- CARD ABSTRACTION ------- + # Assign cluster ID for PREFLOP cards if player == 0: - player_cards = history[0] - history[0] = get_preflop_cluster_id(history[0]) + history[0] = str(player_preflop_clusters[self.sample_id]) history[1] = "?" else: - player_cards = history[1] history[0] = "?" - history[1] = get_preflop_cluster_id(history[1]) + history[1] = str(opp_preflop_clusters[self.sample_id]) - # ----- Assign cluster ID for FLOP/TURN/RIVER ----- - community_cards = "" + # Assign cluster ID for FLOP/TURN/RIVER new_stage = False stage_i = 0 for i, action in enumerate(history): @@ -380,17 +462,23 @@ def get_infoSet_key(self, kmeans_flop, kmeans_turn, kmeans_river) -> List[Action new_stage = False if stage_i == 1: assert len(action) == 6 - community_cards += action - history[i] = get_flop_cluster_id(kmeans_flop, player_cards + community_cards) + if player == 0: + history[i] = str(player_flop_clusters[self.sample_id]) + else: + history[i] = str(opp_flop_clusters[self.sample_id]) elif stage_i == 2: assert len(action) == 2 - community_cards += action - history[i] = get_turn_cluster_id(kmeans_turn, player_cards + community_cards) + if player == 0: + history[i] = str(player_turn_clusters[self.sample_id]) + else: + history[i] = str(opp_turn_clusters[self.sample_id]) elif stage_i == 3: assert len(action) == 2 - community_cards += action - history[i] = get_river_cluster_id(kmeans_river, player_cards + community_cards) + if player == 0: + history[i] = str(player_river_clusters[self.sample_id]) + else: + history[i] = str(opp_river_clusters[self.sample_id]) elif action == "/": new_stage = True stage_i += 1 @@ -414,9 +502,10 @@ class HoldemInfoSet(base.InfoSet): """ - def __init__(self, infoSet: List[Action], actions: List[Action], player: Player): - assert len(infoSet) >= 2 - super().__init__(infoSet, actions, player) + def __init__(self, infoSet_key: List[Action], actions: List[Action], player: Player): + assert len(infoSet_key) >= 2 + abstracted_actions = copy.deepcopy(actions) + super().__init__(infoSet_key, abstracted_actions, player) def create_infoSet(infoSet_key: List[Action], actions: List[Action], player: Player): @@ -426,59 +515,155 @@ def create_infoSet(infoSet_key: List[Action], actions: List[Action], player: Pla return HoldemInfoSet(infoSet_key, actions, player) -def create_history(): - return HoldEmHistory() +def create_history(sample_id): + return HoldEmHistory(sample_id=sample_id) -# CFR with abstraction integrated -class HoldemAbstractCFR(base.CFR): +class HoldemCFR(base.CFR): def __init__( self, create_infoSet, create_history, - kmeans_flop, - kmeans_turn, - kmeans_river, n_players: int = 2, iterations: int = 1000000, ): super().__init__(create_infoSet, create_history, n_players, iterations) -if __name__ == "__main__": - kmeans_flop, kmeans_turn, kmeans_river = load_kmeans_classifiers() - # cfr = HoldemAbstractCFR(create_infoSet, create_history) - # cfr.solve() +from joblib import Parallel, delayed + + +def evaluate_winner(board, player_hand, opponent_hand): + p1_score = evaluate_cards(*(board + player_hand)) + p2_score = evaluate_cards(*(board + opponent_hand)) + if p1_score < p2_score: + return 1 + elif p1_score > p2_score: + return -1 + else: + return 0 + +def generate_dataset(iterations=1000, num_samples=10000, save=True): + """ + To make things faster, we pre-generate the boards and hands. We also pre-cluster the hands """ - When we work with these abstractions, we have two types: - 1. Action Abstraction - 2. Card Abstraction - - Both of these are implemented in a different way. - - """ - - hist: HoldEmHistory = create_history() - assert hist.player() == -1 - hist1 = hist + "AkTh" - assert hist1.player() == -1 - hist2 = hist1 + "QdKd" - assert hist2.player() == 0 - print(hist2.get_infoSet_key(kmeans_flop, kmeans_turn, kmeans_river)) - hist3 = hist2 + "b2" - assert hist3.player() == 1 - hist4 = hist3 + "c" - assert hist4.player() == -1 - # Below are chance events, so it doesn't matter which player it is - hist5 = hist4 + "/" - assert hist5.player() == -1 - hist6 = hist5 + "QhKsKh" - assert hist6.player() == 1 - hist7 = hist6 + "b1" - hist8: HoldEmHistory = hist7 + "b3" + boards, player_hands, opponent_hands = phEvaluatorSetup(num_samples) + + np_boards = np.array(boards) + np_player_hands = np.array(player_hands) + np_opponent_hands = np.array(opponent_hands) + + player_flop_cards = np.concatenate((np_player_hands, np_boards[:, :3]), axis=1).tolist() + player_turn_cards = np.concatenate((np_player_hands, np_boards[:, :4]), axis=1).tolist() + player_river_cards = np.concatenate((np_player_hands, np_boards), axis=1).tolist() + opp_flop_cards = np.concatenate((np_opponent_hands, np_boards[:, :3]), axis=1).tolist() + opp_turn_cards = np.concatenate((np_opponent_hands, np_boards[:, :4]), axis=1).tolist() + opp_river_cards = np.concatenate((np_opponent_hands, np_boards), axis=1).tolist() + curr = time.time() - print(hist8.get_infoSet_key(kmeans_flop, kmeans_turn, kmeans_river), time.time() - curr) + print("generating clusters") + + # player_preflop_clusters = Parallel(n_jobs=-1)( + # delayed(get_preflop_cluster_id)(cards) for cards in player_hands + # ) + player_preflop_clusters = Parallel(n_jobs=-1)( + delayed(predict_cluster_fast)(cards, n=3000, total_clusters=20) + for cards in tqdm(player_hands) + ) + player_flop_clusters = Parallel(n_jobs=-1)( + delayed(predict_cluster_fast)(cards, n=1000, total_clusters=10) + for cards in tqdm(player_flop_cards) + ) + player_turn_clusters = Parallel(n_jobs=-1)( + delayed(predict_cluster_fast)(cards, n=500, total_clusters=5) + for cards in tqdm(player_turn_cards) + ) + player_river_clusters = Parallel(n_jobs=-1)( + delayed(predict_cluster_fast)(cards, n=200, total_clusters=5) + for cards in tqdm(player_river_cards) + ) + + opp_preflop_clusters = Parallel(n_jobs=-1)( + delayed(predict_cluster_fast)(cards, n=3000, total_clusters=20) + for cards in tqdm(opponent_hands) + ) + opp_flop_clusters = Parallel(n_jobs=-1)( + delayed(predict_cluster_fast)(cards, n=1000, total_clusters=10) + for cards in tqdm(opp_flop_cards) + ) + opp_turn_clusters = Parallel(n_jobs=-1)( + delayed(predict_cluster_fast)(cards, n=500, total_clusters=5) + for cards in tqdm(opp_turn_cards) + ) + opp_river_clusters = Parallel(n_jobs=-1)( + delayed(predict_cluster_fast)(cards, n=200, total_clusters=5) + for cards in tqdm(opp_river_cards) + ) + + winners = Parallel(n_jobs=-1)( + delayed(evaluate_winner)(board, player_hand, opponent_hand) + for board, player_hand, opponent_hand in tqdm(zip(boards, player_hands, opponent_hands)) + ) + + print("saving datasets") + np.save("dataset/boards.npy", boards) + np.save("dataset/player_hands.npy", player_hands) + np.save("dataset/opponent_hands.npy", opponent_hands) + np.save("dataset/winners.npy", winners) + print("continuing to save datasets") + + np.save("dataset/player_preflop_clusters.npy", player_preflop_clusters) + np.save("dataset/player_flop_clusters.npy", player_flop_clusters) + np.save("dataset/player_turn_clusters.npy", player_turn_clusters) + np.save("dataset/player_river_clusters.npy", player_river_clusters) + + np.save("dataset/opp_preflop_clusters.npy", opp_preflop_clusters) + np.save("dataset/opp_flop_clusters.npy", opp_flop_clusters) + np.save("dataset/opp_turn_clusters.npy", opp_turn_clusters) + np.save("dataset/opp_river_clusters.npy", opp_river_clusters) + + print(time.time() - curr) + + +import joblib - # cfr = base.CFR(create_infoSet, create_history) - # cfr.solve() +if __name__ == "__main__": + # generate_dataset() + load_dataset() + cfr = HoldemCFR(create_infoSet, create_history) + # cfr.infoSets = joblib.load("infosets.joblib") + cfr.solve() + +# """ +# When we work with these abstractions, we have two types: +# 1. Action Abstraction +# 2. Card Abstraction + +# Both of these are implemented in a different way. + +# """ + +# hist: HoldEmHistory = create_history() +# assert hist.player() == -1 +# hist1 = hist + "AkTh" +# assert hist1.player() == -1 +# hist2 = hist1 + "QdKd" +# assert hist2.player() == 0 +# print(hist2.get_infoSet_key(kmeans_flop, kmeans_turn, kmeans_river)) +# hist3 = hist2 + "b2" +# assert hist3.player() == 1 +# hist4 = hist3 + "c" +# assert hist4.player() == -1 +# # Below are chance events, so it doesn't matter which player it is +# hist5 = hist4 + "/" +# assert hist5.player() == -1 +# hist6 = hist5 + "QhKsKh" +# assert hist6.player() == 1 +# hist7 = hist6 + "b1" +# hist8: HoldEmHistory = hist7 + "b3" +# curr = time.time() +# print(hist8.get_infoSet_key(kmeans_flop, kmeans_turn, kmeans_river), time.time() - curr) + +# # cfr = base.CFR(create_infoSet, create_history) +# # cfr.solve() diff --git a/src/kuhn.py b/src/kuhn.py index d2e42ca..cf80e27 100644 --- a/src/kuhn.py +++ b/src/kuhn.py @@ -116,9 +116,9 @@ class KuhnInfoSet(base.InfoSet): """ - def __init__(self, infoSet: List[Action], actions: List[Action], player: Player): + def __init__(self, infoSet_key: List[Action], actions: List[Action], player: Player): assert len(infoSet) >= 2 - super().__init__(infoSet, actions, player) + super().__init__(infoSet_key, actions, player) def create_infoSet(infoSet_key: List[Action], actions: List[Action], player: Player):