PWhiddy · Baekalfen · Oct 9, 2023 · Oct 9, 2023 · Oct 10, 2023
diff --git a/baselines/red_gym_env.py b/baselines/red_gym_env.py
@@ -73,6 +73,10 @@ def __init__(
             WindowEvent.RELEASE_BUTTON_B
         ]
 
+        # This would be the optimal, but CnnPolicy warns about atleast 36x36 and we get a runtime error in update_frame_knn_index
+        # The tilemap output is 160x144 pixels divided by 8px tiles: 20x18 tiles (inverted for row-major).
+        # We are not actually storing color data, but rather just a tile index, which is seemingly unique for a specific tile (ask on Discord for the long explanation).
+        # self.output_shape = (18, 20, 3)
         self.output_shape = (36, 40, 3)
         self.mem_padding = 2
         self.memory_height = 8
@@ -85,17 +89,22 @@ def __init__(
 
         # Set these in ALL subclasses
         self.action_space = spaces.Discrete(len(self.valid_actions))
+        # TODO: Maybe this needs to be changed as we are using larger numbers idk?
         self.observation_space = spaces.Box(low=0, high=255, shape=self.output_full, dtype=np.uint8)
 
-        head = 'headless' if config['headless'] else 'SDL2'
+        # 'dummy' does not render anything to the screen like 'headless' or 'SDL2' does, which saves *alot* of compute time (10x).
+        # This is also why we need to use the tilemaps, but tilemaps are also more accurate.
+        head = 'dummy' if config['headless'] else 'SDL2'
 
         self.pyboy = PyBoy(
                 config['gb_path'],
                 debugging=False,
                 disable_input=False,
                 window_type=head,
                 hide_window='--quiet' in sys.argv,
+                game_wrapper=True,
             )
+        self.game_wrapper = self.pyboy.game_wrapper()
 
         self.screen = self.pyboy.botsupport_manager().screen()
 
@@ -151,10 +160,12 @@ def init_knn(self):
             max_elements=self.num_elements, ef_construction=100, M=16)
 
     def render(self, reduce_res=True, add_memory=True, update_mem=True):
-        game_pixels_render = self.screen.screen_ndarray() # (144, 160, 3)
         if reduce_res:
-            game_pixels_render = (255*resize(game_pixels_render, self.output_shape)).astype(np.uint8)
             if update_mem:
+                game_pixels_render = np.zeros(self.output_shape, dtype=np.uint8)
+                # WARN: We only fill some of the frame buffer, as we cannot make self.output_shape smaller without crashing,
+                # but the game area isn't any larger. I hope the RL library will ignore the blank space.
+                game_pixels_render[:18,:20, :] = np.asarray(self.game_wrapper.game_area(), dtype=np.uint32).view(np.uint8).reshape(18, 20, 4)[:,:,:3]
                 self.recent_frames[0] = game_pixels_render
             if add_memory:
                 pad = np.zeros(
@@ -169,8 +180,11 @@ def render(self, reduce_res=True, add_memory=True, update_mem=True):
                         rearrange(self.recent_frames, 'f h w c -> (f h) w c')
                     ),
                     axis=0)
+        else:
+            # WARN: This is blank when using 'dummy'. Maybe change to headless when you need to visualize.
+            game_pixels_render = self.screen.screen_ndarray() # (144, 160, 3)
         return game_pixels_render
-    
+
     def step(self, action):
 
         self.run_action_on_emulator(action)
@@ -392,9 +406,9 @@ def get_levels_reward(self):
             scaled = (level_sum-explore_thresh) / scale_factor + explore_thresh
         self.max_level_rew = max(self.max_level_rew, scaled)
         return self.max_level_rew
-    
+
     def get_knn_reward(self):
-        pre_rew = 0.004
+        pre_rew = 0.004 # idk if something needs to be recalibrated for the change to tilemaps
         post_rew = 0.01
         cur_size = self.knn_index.get_current_count()
         base = (self.base_explore if self.levels_satisfied else cur_size) * pre_rew
@@ -486,6 +500,8 @@ def update_max_event_rew(self):
     def read_hp_fraction(self):
         hp_sum = sum([self.read_hp(add) for add in [0xD16C, 0xD198, 0xD1C4, 0xD1F0, 0xD21C, 0xD248]])
         max_hp_sum = sum([self.read_hp(add) for add in [0xD18D, 0xD1B9, 0xD1E5, 0xD211, 0xD23D, 0xD269]])
+        if max_hp_sum == 0:
+            return 0 # Avoid zero-division
         return hp_sum / max_hp_sum
 
     def read_hp(self, start):

diff --git a/baselines/run_baseline.py b/baselines/run_baseline.py
@@ -12,10 +12,10 @@
 updates_per_checkpoint = 4
 
 env_config = {
-                'headless': True, 'save_final_state': True, 'early_stop': False, 
+                'headless': True, 'save_final_state': True, 'early_stop': False,
                 'action_freq': 24, 'init_state': '../fast_text_start.state', 'max_steps': run_steps,
-                'print_rewards': True, 'save_video': True, 'session_path': sess_path,
-                'gb_path': '../PokemonRed.gb', 'debug': False, 'sim_frame_dist': 2_000_000.0
+                'print_rewards': True, 'save_video': False, 'fast_video': True, 'session_path': sess_path,
+                'gb_path': '../PokemonRed.gb', 'debug': False, 'sim_frame_dist': 2_000_000.0,
             }
 
 env = RedGymEnv(config=env_config)
@@ -24,7 +24,7 @@
 
 learn_steps = 40
 file_name = 'poke_' #'best_12-7/poke_12_b'
-inference_only = True 
+inference_only = True
 
 if exists(file_name + '.zip'):
     print('\nloading checkpoint')