one bot to train for both players

r3w0p · Oct 13, 2024 · 4e61078 · 4e61078
1 parent 48bd4b4
commit 4e61078
Show file tree

Hide file tree

Showing 8 changed files with 101 additions and 61 deletions.
diff --git a/include/caravan/core/common.h b/include/caravan/core/common.h
@@ -93,7 +93,6 @@ typedef struct Card {
     Rank rank{};
 } Card;
 
-typedef std::array<Card, HAND_SIZE_MAX_START> Hand;
 typedef std::vector<Card> Deck;
 typedef std::array<Card, TRACK_FACE_MAX> Faces;
 typedef std::array<CaravanName, 3> PlayerCaravanNames;
@@ -127,14 +126,6 @@ typedef struct GameCommand {
     Card board{};
 } GameCommand;
 
-typedef struct TrainConfig {
-    float discount{0.0};
-    float explore{0.0};
-    float learning{0.0};
-    uint32_t episode_max{0};
-    uint32_t episode{0};
-} TrainConfig;
-
 /*
  * FUNCTIONS
  */

diff --git a/include/caravan/core/training.h b/include/caravan/core/training.h
@@ -0,0 +1,27 @@
+// Copyright (c) 2022-2024 r3w0p
+// The following code can be redistributed and/or
+// modified under the terms of the GPL-3.0 License.
+
+#ifndef CARAVAN_CORE_TRAINING_H
+#define CARAVAN_CORE_TRAINING_H
+
+#include <cstdint>
+#include <array>
+#include <vector>
+#include <string>
+
+const uint16_t SIZE_GAME_STATE = 1;
+
+typedef std::array<uint8_t, SIZE_GAME_STATE> GameState;
+
+typedef std::map<GameState, std::map<std::string, uint8_t>> QTable;
+
+typedef struct TrainConfig {
+    float discount{0.0};
+    float explore{0.0};
+    float learning{0.0};
+    uint32_t episode_max{0};
+    uint32_t episode{0};
+} TrainConfig;
+
+#endif //CARAVAN_CORE_TRAINING_H
diff --git a/include/caravan/user/bot/ai.h b/include/caravan/user/bot/ai.h
@@ -8,15 +8,10 @@
 #include "caravan/user/user.h"
 
 class UserBotAI : public UserBot {
-protected:
-    int policy;  // TODO typedef array
-
 public:
     explicit UserBotAI(PlayerName pn);
 
     std::string request_move(Game *game) override;
-
-    void get_policy(); // TODO return policy
 };
 
 #endif //CARAVAN_USER_BOT_AI_H
diff --git a/include/caravan/user/bot/ai_train.h → include/caravan/user/bot/train.h b/include/caravan/user/bot/ai_train.h → include/caravan/user/bot/train.h
@@ -2,17 +2,21 @@
 // The following code can be redistributed and/or
 // modified under the terms of the GPL-3.0 License.
 
-#ifndef CARAVAN_USER_BOT_AI_TRAIN_H
-#define CARAVAN_USER_BOT_AI_TRAIN_H
+#ifndef CARAVAN_USER_BOT_TRAIN_H
+#define CARAVAN_USER_BOT_TRAIN_H
 
 #include "caravan/user/bot/ai.h"
 #include "caravan/core/training.h"
 
-class UserBotAITrain : public UserBotAI {
+class UserBotTrain : public UserBot {
+protected:
+    QTable q_table;
+
+    GameState get_game_state(Game *game, PlayerName pname);
 public:
-    explicit UserBotAITrain(PlayerName pn);
+    explicit UserBotTrain();
 
     std::string make_move_train(Game *game, TrainConfig *tc);
 };
 
-#endif //CARAVAN_USER_BOT_AI_TRAIN_H
+#endif //CARAVAN_USER_BOT_TRAIN_H
diff --git a/src/caravan/train.cpp b/src/caravan/train.cpp
@@ -8,15 +8,14 @@
 #include <vector>
 #include <algorithm>
 #include "cxxopts.hpp"
-#include "caravan/user/bot/ai_train.h"
+#include "caravan/user/bot/train.h"
 
 const uint8_t FIRST_ABC = 1;
 const uint8_t FIRST_DEF = 2;
 
 int main(int argc, char *argv[]) {
-    UserBotAITrain *user_abc;
-    UserBotAITrain *user_def;
-    UserBotAITrain *user_turn;
+    UserBotTrain *user_train;
+    PlayerName player_turn;
     Game *game;
     GameConfig gc;
     TrainConfig tc;
@@ -51,14 +50,13 @@ int main(int argc, char *argv[]) {
             .episode = 1
         };
 
-        user_abc = new UserBotAITrain(PLAYER_ABC);
-        user_def = new UserBotAITrain(PLAYER_DEF);
+        // Single bot plays as both players and is trained on both.
+        user_train = new UserBotTrain();
 
         for(; tc.episode <= tc.episode_max; tc.episode++) {
             // Random first player
             rand_first = distr_first(gen);
             gc.player_first = rand_first == FIRST_ABC ? PLAYER_ABC : PLAYER_DEF;
-            user_turn = rand_first == FIRST_ABC ? user_abc : user_def;
 
             // Set training parameters
             tc.discount = discount;
@@ -70,26 +68,9 @@ int main(int argc, char *argv[]) {
 
             // Take turns until a winner is declared
             while(game->get_winner() != NO_PLAYER) {
-                // TODO borrow logic from other bot to determine if move is
-                //  valid or not; use this to narrow down possible moves that
-                //  can be made per game state; any issues with move when
-                //  passing to game should result in fatal exception
-                user_turn->make_move_train(game, &tc);
-
-                // TODO convert string move to command: take functions to do
-                //  this out of view tui; have single function to make this
-                //  conversion and then pass it to the game
-
-                if(user_turn->get_name() == PLAYER_ABC) {
-                    user_turn = user_def;
-                } else {
-                    user_turn = user_abc;
-                }
+                user_train->make_move_train(game, &tc);
             }
 
-            // TODO reward winner here?
-            //  (or maybe bots should figure that out for themselves...)
-
             // Finish game
             game->close();
             delete game;

diff --git a/src/caravan/user/bot/ai.cpp b/src/caravan/user/bot/ai.cpp
@@ -5,11 +5,11 @@
 #include "caravan/user/bot/ai.h"
 
 UserBotAI::UserBotAI(PlayerName pn) : UserBot(pn) {
-    // TODO set trained policy
+    // TODO load trained q_table
 }
 
 std::string UserBotAI::request_move(Game *game) {
     if (closed) { throw CaravanFatalException("Bot is closed."); }
 
-    return "D1";  // TODO
+    return "D1";  // TODO use q_table
 }
diff --git a/src/caravan/user/bot/ai_train.cpp b/src/caravan/user/bot/ai_train.cpp
diff --git a/src/caravan/user/bot/train.cpp b/src/caravan/user/bot/train.cpp
@@ -0,0 +1,57 @@
+// Copyright (c) 2022-2024 r3w0p
+// The following code can be redistributed and/or
+// modified under the terms of the GPL-3.0 License.
+
+#include <array>
+#include <map>
+#include "caravan/user/bot/train.h"
+#include "caravan/core/training.h"
+
+UserBotTrain::UserBotTrain() : UserBot(NO_PLAYER) {
+    // Empty by default and populated during training.
+    q_table = {};
+}
+
+GameState UserBotTrain::get_game_state(Game *game, PlayerName pname) {
+    // caravans A to F...
+    // get numerals from 1-8
+    // for each numeral, get face cards attached to it
+    // {AH, KC, 0, 0} x 8 (all ints, with 0 for any empty slot)
+
+    // my hand...
+    // ordered list of cards in hand
+    // {AC, 2D, 8S, 8S, JO, 0, 0, 0} (all ints, with 0 for empty slot)
+
+    // player indicator...
+    // 0 = player abc
+    // 1 = player def
+
+    // total space needed for game state
+    // 32 per caravan, 6 caravans = 192
+    // 8 for hand
+    // 1 for player
+    // total = 201
+}
+
+std::string UserBotTrain::make_move_train(Game *game, TrainConfig *tc) {
+    if (closed) { throw CaravanFatalException("Bot is closed."); }
+
+    PlayerName pturn = game->get_player_turn();
+
+    // Read game state and maybe add to the q-table if not present
+    // game_state = get_game_state()
+    // if !q_table.contains(game_state) ...
+    // for each action state, set to 0
+
+    // TODO choose an action
+    //  need to determine which actions are available based on current hand
+
+    // TODO perform action
+
+    // TODO measure reward (1 = win, -1 = loss, 0 = neither)
+
+    // TODO update q_table
+    //  if a winner: +1 for winning player, -1 for losing player
+
+    return "D1";  // TODO
+}