Merge branch main

wangxt111 · wangxt111 · commit 083694edd643 · 2025-01-27T23:11:49.000+08:00
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,4 @@
 **/__pycache__/
 .DS_Store
+ghost.pth
+pacman.pth
diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1,3 @@
 [submodule "core"]
 	path = core
-	url = git@github.com:PacMan-Logic/Logic-core.git
+	url = git@github.com:PacMan-Logic/Logic-core.git
diff --git a/ai.py b/ai.py
@@ -277,8 +277,7 @@ def choose_move(self, game_state: GameState):
 
         return [Direction.STAY.value]  # 默认停留
 
-
 # TODO: 你需要实现一个ai函数
 
-ai_func = PacmanAI().choose_move  # TODO: 你需要把ai_func替换为自己的ai函数
+ai_func = PacmanAI().choose_move
 __all__ = ["ai_func"]
diff --git a/ai_rl.py b/ai_rl.py
@@ -0,0 +1,36 @@
+from core.gamedata import GameState
+from core.GymEnvironment import PacmanEnv
+from model import *
+from train import state_dict_to_tensor
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+class PacmanAI:
+    def __init__(self, device=device):
+        self.device = device
+        self.pacman_net = PacmanNet(4, 5, 40)
+        self.pacman_net.load_state_dict(torch.load("pacman.pth"))
+        self.pacman_net.to(self.device)
+        self.pacman_net.eval()
+
+    def __call__(self, game_state: GameState):
+        state = game_state.gamestate_to_statedict()
+        state_tensor, extra = state_dict_to_tensor(state)
+        with torch.no_grad():
+            op = (
+                self.pacman_net(state_tensor.to(self.device), extra.to(self.device))
+                .argmax(1)
+                .cpu()
+            )
+        return [op.item()]
+
+
+if __name__ == "__main__":
+    ai = PacmanAI()
+    env = PacmanEnv()
+    env.reset()
+    state = env.game_state()
+
+    out = ai(state)
+    print(out)
diff --git a/core b/core
@@ -1 +1 @@
-Subproject commit 6fd23a655b07a9bb7a6361973beb1cfcd79c06be
+Subproject commit 46aade8fabdd1189b5526c156b550f309cced836
diff --git a/main.py b/main.py
@@ -1,11 +1,11 @@
-import sys
 import json
+import sys
 
-from ai_to_judger import pacman_to_judger
-from ai_to_judger import ghost_to_judger
 from ai import *
-from core.GymEnvironment import PacmanEnv
+from ai_rl import PacmanAI
+from ai_to_judger import ghost_to_judger, pacman_to_judger
 from core.gamedata import GameState
+from core.GymEnvironment import PacmanEnv
 from utils.utils import write_to_judger
 
 
@@ -27,6 +27,7 @@ def __init__(self):
         id = int(input())
         self.id = id
         self.level_change = True
+        self.eat_all_beans = False
 
     def run(self, ai):
         while 1:
@@ -40,7 +41,7 @@ def run(self, ai):
                 # 0号玩家发送信息
                 pacman_op(self.env, ai)
 
-                # 1号玩家发送信息
+                # 等待1号玩家发送信息
                 get_info = input()
                 print(f"receive info: {get_info}", file=sys.stderr)
 
@@ -50,13 +51,13 @@ def run(self, ai):
                 get_op_json = json.loads(get_op)
                 pacman_action = get_op_json["pacman_action"]
                 ghosts_action = get_op_json["ghosts_action"]
-                board, score, self.level_change = self.env.step(
+                info , pacman_reward , ghosts_reward , self.level_change , self.eat_all_beans = self.env.step(
                     pacman_action, ghosts_action
                 )
             else:
                 # 当前为1号玩家
 
-                # 0号玩家发送信息
+                # 等待0号玩家发送信息
                 get_info = input()
                 print(f"receive info: {get_info}", file=sys.stderr)
 
@@ -69,12 +70,13 @@ def run(self, ai):
                 get_op_json = json.loads(get_op)
                 pacman_action = get_op_json["pacman_action"]
                 ghosts_action = get_op_json["ghosts_action"]
-                board, score, self.level_change = self.env.step(
+                info , pacman_reward , ghosts_reward , self.level_change , self.eat_all_beans = self.env.step(
                     pacman_action, ghosts_action
                 )
 
 
 if __name__ == "__main__":
     print("init done", file=sys.stderr)
     controller = Controller()
+    # ai_func = PacmanAI()
     controller.run(ai_func)
diff --git a/model.py b/model.py
@@ -0,0 +1,99 @@
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+# state-value network for pacman
+class PacmanNet(nn.Module):
+    def __init__(self, input_channel_num, num_actions, extra_size):
+        super().__init__()
+        self.channels = input_channel_num
+        self.embeddings = nn.ModuleList(
+            [nn.Embedding(9, 16) for _ in range(input_channel_num)])
+        self.conv1 = nn.Conv2d(64, 64, kernel_size=8, stride=4)
+        self.conv2 = nn.Conv2d(64, 64, kernel_size=4, stride=2)
+        self.bn = nn.BatchNorm2d(64)
+        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=2)
+
+        self.encoder = nn.Linear(extra_size, 64)
+
+        self.fc1 = nn.Linear(64, 32)
+        self.fc2 = nn.Linear(32, num_actions)
+
+    def forward(self, x, y):
+        B, C, H, W = x.shape
+        embedded_channels = []
+        for i in range(self.channels):
+            flattened_channel = x[:, i, :, :].view(B, -1).long()
+            embedded_channel = self.embeddings[i](flattened_channel)
+            embedded_channel = embedded_channel.view(
+                B, 16, H, W)
+            embedded_channels.append(embedded_channel)
+        # Concatenate along the channel dimension
+        x = torch.cat(embedded_channels, dim=1)
+
+        x = F.relu(self.conv1(x))
+        x = F.relu(self.conv2(x))
+        x = self.bn(x)
+        x = F.relu(self.conv3(x))
+        y = F.sigmoid(self.encoder(y))
+        # print(x.shape)
+        x = x.view(x.size(0), -1)
+        x = F.relu(self.fc1(x+y))
+        return self.fc2(x)
+
+
+# state-value network for ghost
+class GhostNet(nn.Module):
+    def __init__(self, input_channel_num, num_actions, extra_size):
+        super().__init__()
+        self.channels = input_channel_num
+        self.embeddings = nn.ModuleList(
+            [nn.Embedding(9, 16) for _ in range(input_channel_num)])
+
+        self.conv1 = nn.Conv2d(64, 64, kernel_size=8, stride=4)
+        self.conv2 = nn.Conv2d(64, 64, kernel_size=4, stride=2)
+        self.bn = nn.BatchNorm2d(64)
+        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=2)
+
+        self.encoder = nn.Linear(extra_size, 64)
+
+        self.fc1 = nn.Linear(64, 32)
+        self.fc2 = nn.Linear(32, num_actions*3)
+
+    def forward(self, x, y):
+        B, C, H, W = x.shape
+        embedded_channels = []
+        for i in range(self.channels):
+            flattened_channel = x[:, i, :, :].view(B, -1).long()
+            embedded_channel = self.embeddings[i](flattened_channel)
+            embedded_channel = embedded_channel.view(
+                B, 16, H, W)
+            embedded_channels.append(embedded_channel)
+        # Concatenate along the channel dimension
+        x = torch.cat(embedded_channels, dim=1)
+        x = F.relu(self.conv1(x))
+        x = F.relu(self.conv2(x))
+        x = self.bn(x)
+        x = F.relu(self.conv3(x))
+        # print(x.shape)
+
+        y = F.sigmoid(self.encoder(y))
+
+        x = x.view(x.size(0), -1)
+        x = F.relu(self.fc1(x+y))
+        return self.fc2(x).view(-1, 3, 5)
+
+
+# test the shape of the output
+if __name__ == "__main__":
+    rand_input = torch.rand(1, 4, 38, 38)
+    extra_input = torch.rand(1, 10)
+    pacman_net = PacmanNet(4, 5, 10)
+    res = pacman_net(rand_input, extra_input)
+    print(res.shape)
+
+    ghost_net = GhostNet(4, 5, 10)
+    res = ghost_net(rand_input, extra_input)
+    print(res.shape)
diff --git a/train.py b/train.py

-Original file line number
+Diff line change
 [submodule "core"]
 	path = core
 -	url = [email protected]:PacMan-Logic/Logic-core.git
 +	url = [email protected]:PacMan-Logic/Logic-core.git