Skip to content

Commit

Permalink
one bot to train for both players
Browse files Browse the repository at this point in the history
  • Loading branch information
r3w0p committed Oct 13, 2024
1 parent 48bd4b4 commit 4e61078
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 61 deletions.
9 changes: 0 additions & 9 deletions include/caravan/core/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ typedef struct Card {
Rank rank{};
} Card;

typedef std::array<Card, HAND_SIZE_MAX_START> Hand;
typedef std::vector<Card> Deck;
typedef std::array<Card, TRACK_FACE_MAX> Faces;
typedef std::array<CaravanName, 3> PlayerCaravanNames;
Expand Down Expand Up @@ -127,14 +126,6 @@ typedef struct GameCommand {
Card board{};
} GameCommand;

typedef struct TrainConfig {
float discount{0.0};
float explore{0.0};
float learning{0.0};
uint32_t episode_max{0};
uint32_t episode{0};
} TrainConfig;

/*
* FUNCTIONS
*/
Expand Down
27 changes: 27 additions & 0 deletions include/caravan/core/training.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (c) 2022-2024 r3w0p
// The following code can be redistributed and/or
// modified under the terms of the GPL-3.0 License.

#ifndef CARAVAN_CORE_TRAINING_H
#define CARAVAN_CORE_TRAINING_H

#include <cstdint>
#include <array>
#include <vector>
#include <string>

const uint16_t SIZE_GAME_STATE = 1;

typedef std::array<uint8_t, SIZE_GAME_STATE> GameState;

typedef std::map<GameState, std::map<std::string, uint8_t>> QTable;

typedef struct TrainConfig {
float discount{0.0};
float explore{0.0};
float learning{0.0};
uint32_t episode_max{0};
uint32_t episode{0};
} TrainConfig;

#endif //CARAVAN_CORE_TRAINING_H
5 changes: 0 additions & 5 deletions include/caravan/user/bot/ai.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,10 @@
#include "caravan/user/user.h"

class UserBotAI : public UserBot {
protected:
int policy; // TODO typedef array

public:
explicit UserBotAI(PlayerName pn);

std::string request_move(Game *game) override;

void get_policy(); // TODO return policy
};

#endif //CARAVAN_USER_BOT_AI_H
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,21 @@
// The following code can be redistributed and/or
// modified under the terms of the GPL-3.0 License.

#ifndef CARAVAN_USER_BOT_AI_TRAIN_H
#define CARAVAN_USER_BOT_AI_TRAIN_H
#ifndef CARAVAN_USER_BOT_TRAIN_H
#define CARAVAN_USER_BOT_TRAIN_H

#include "caravan/user/bot/ai.h"
#include "caravan/core/training.h"

class UserBotAITrain : public UserBotAI {
class UserBotTrain : public UserBot {
protected:
QTable q_table;

GameState get_game_state(Game *game, PlayerName pname);
public:
explicit UserBotAITrain(PlayerName pn);
explicit UserBotTrain();

std::string make_move_train(Game *game, TrainConfig *tc);
};

#endif //CARAVAN_USER_BOT_AI_TRAIN_H
#endif //CARAVAN_USER_BOT_TRAIN_H
31 changes: 6 additions & 25 deletions src/caravan/train.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,14 @@
#include <vector>
#include <algorithm>
#include "cxxopts.hpp"
#include "caravan/user/bot/ai_train.h"
#include "caravan/user/bot/train.h"

const uint8_t FIRST_ABC = 1;
const uint8_t FIRST_DEF = 2;

int main(int argc, char *argv[]) {
UserBotAITrain *user_abc;
UserBotAITrain *user_def;
UserBotAITrain *user_turn;
UserBotTrain *user_train;
PlayerName player_turn;
Game *game;
GameConfig gc;
TrainConfig tc;
Expand Down Expand Up @@ -51,14 +50,13 @@ int main(int argc, char *argv[]) {
.episode = 1
};

user_abc = new UserBotAITrain(PLAYER_ABC);
user_def = new UserBotAITrain(PLAYER_DEF);
// Single bot plays as both players and is trained on both.
user_train = new UserBotTrain();

for(; tc.episode <= tc.episode_max; tc.episode++) {
// Random first player
rand_first = distr_first(gen);
gc.player_first = rand_first == FIRST_ABC ? PLAYER_ABC : PLAYER_DEF;
user_turn = rand_first == FIRST_ABC ? user_abc : user_def;

// Set training parameters
tc.discount = discount;
Expand All @@ -70,26 +68,9 @@ int main(int argc, char *argv[]) {

// Take turns until a winner is declared
while(game->get_winner() != NO_PLAYER) {
// TODO borrow logic from other bot to determine if move is
// valid or not; use this to narrow down possible moves that
// can be made per game state; any issues with move when
// passing to game should result in fatal exception
user_turn->make_move_train(game, &tc);

// TODO convert string move to command: take functions to do
// this out of view tui; have single function to make this
// conversion and then pass it to the game

if(user_turn->get_name() == PLAYER_ABC) {
user_turn = user_def;
} else {
user_turn = user_abc;
}
user_train->make_move_train(game, &tc);
}

// TODO reward winner here?
// (or maybe bots should figure that out for themselves...)

// Finish game
game->close();
delete game;
Expand Down
4 changes: 2 additions & 2 deletions src/caravan/user/bot/ai.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
#include "caravan/user/bot/ai.h"

UserBotAI::UserBotAI(PlayerName pn) : UserBot(pn) {
// TODO set trained policy
// TODO load trained q_table
}

std::string UserBotAI::request_move(Game *game) {
if (closed) { throw CaravanFatalException("Bot is closed."); }

return "D1"; // TODO
return "D1"; // TODO use q_table
}
15 changes: 0 additions & 15 deletions src/caravan/user/bot/ai_train.cpp

This file was deleted.

57 changes: 57 additions & 0 deletions src/caravan/user/bot/train.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright (c) 2022-2024 r3w0p
// The following code can be redistributed and/or
// modified under the terms of the GPL-3.0 License.

#include <array>
#include <map>
#include "caravan/user/bot/train.h"
#include "caravan/core/training.h"

UserBotTrain::UserBotTrain() : UserBot(NO_PLAYER) {
// Empty by default and populated during training.
q_table = {};
}

GameState UserBotTrain::get_game_state(Game *game, PlayerName pname) {
// caravans A to F...
// get numerals from 1-8
// for each numeral, get face cards attached to it
// {AH, KC, 0, 0} x 8 (all ints, with 0 for any empty slot)

// my hand...
// ordered list of cards in hand
// {AC, 2D, 8S, 8S, JO, 0, 0, 0} (all ints, with 0 for empty slot)

// player indicator...
// 0 = player abc
// 1 = player def

// total space needed for game state
// 32 per caravan, 6 caravans = 192
// 8 for hand
// 1 for player
// total = 201
}

std::string UserBotTrain::make_move_train(Game *game, TrainConfig *tc) {
if (closed) { throw CaravanFatalException("Bot is closed."); }

PlayerName pturn = game->get_player_turn();

// Read game state and maybe add to the q-table if not present
// game_state = get_game_state()
// if !q_table.contains(game_state) ...
// for each action state, set to 0

// TODO choose an action
// need to determine which actions are available based on current hand

// TODO perform action

// TODO measure reward (1 = win, -1 = loss, 0 = neither)

// TODO update q_table
// if a winner: +1 for winning player, -1 for losing player

return "D1"; // TODO
}

0 comments on commit 4e61078

Please sign in to comment.