diff --git a/baselines/red_gym_env.py b/baselines/red_gym_env.py index 6371f781..3dedae91 100644 --- a/baselines/red_gym_env.py +++ b/baselines/red_gym_env.py @@ -73,6 +73,10 @@ def __init__( WindowEvent.RELEASE_BUTTON_B ] + # This would be the optimal, but CnnPolicy warns about atleast 36x36 and we get a runtime error in update_frame_knn_index + # The tilemap output is 160x144 pixels divided by 8px tiles: 20x18 tiles (inverted for row-major). + # We are not actually storing color data, but rather just a tile index, which is seemingly unique for a specific tile (ask on Discord for the long explanation). + # self.output_shape = (18, 20, 3) self.output_shape = (36, 40, 3) self.mem_padding = 2 self.memory_height = 8 @@ -85,9 +89,12 @@ def __init__( # Set these in ALL subclasses self.action_space = spaces.Discrete(len(self.valid_actions)) + # TODO: Maybe this needs to be changed as we are using larger numbers idk? self.observation_space = spaces.Box(low=0, high=255, shape=self.output_full, dtype=np.uint8) - head = 'headless' if config['headless'] else 'SDL2' + # 'dummy' does not render anything to the screen like 'headless' or 'SDL2' does, which saves *alot* of compute time (10x). + # This is also why we need to use the tilemaps, but tilemaps are also more accurate. + head = 'dummy' if config['headless'] else 'SDL2' self.pyboy = PyBoy( config['gb_path'], @@ -95,7 +102,9 @@ def __init__( disable_input=False, window_type=head, hide_window='--quiet' in sys.argv, + game_wrapper=True, ) + self.game_wrapper = self.pyboy.game_wrapper() self.screen = self.pyboy.botsupport_manager().screen() @@ -151,10 +160,12 @@ def init_knn(self): max_elements=self.num_elements, ef_construction=100, M=16) def render(self, reduce_res=True, add_memory=True, update_mem=True): - game_pixels_render = self.screen.screen_ndarray() # (144, 160, 3) if reduce_res: - game_pixels_render = (255*resize(game_pixels_render, self.output_shape)).astype(np.uint8) if update_mem: + game_pixels_render = np.zeros(self.output_shape, dtype=np.uint8) + # WARN: We only fill some of the frame buffer, as we cannot make self.output_shape smaller without crashing, + # but the game area isn't any larger. I hope the RL library will ignore the blank space. + game_pixels_render[:18,:20, :] = np.asarray(self.game_wrapper.game_area(), dtype=np.uint32).view(np.uint8).reshape(18, 20, 4)[:,:,:3] self.recent_frames[0] = game_pixels_render if add_memory: pad = np.zeros( @@ -169,8 +180,11 @@ def render(self, reduce_res=True, add_memory=True, update_mem=True): rearrange(self.recent_frames, 'f h w c -> (f h) w c') ), axis=0) + else: + # WARN: This is blank when using 'dummy'. Maybe change to headless when you need to visualize. + game_pixels_render = self.screen.screen_ndarray() # (144, 160, 3) return game_pixels_render - + def step(self, action): self.run_action_on_emulator(action) @@ -392,9 +406,9 @@ def get_levels_reward(self): scaled = (level_sum-explore_thresh) / scale_factor + explore_thresh self.max_level_rew = max(self.max_level_rew, scaled) return self.max_level_rew - + def get_knn_reward(self): - pre_rew = 0.004 + pre_rew = 0.004 # idk if something needs to be recalibrated for the change to tilemaps post_rew = 0.01 cur_size = self.knn_index.get_current_count() base = (self.base_explore if self.levels_satisfied else cur_size) * pre_rew @@ -486,6 +500,8 @@ def update_max_event_rew(self): def read_hp_fraction(self): hp_sum = sum([self.read_hp(add) for add in [0xD16C, 0xD198, 0xD1C4, 0xD1F0, 0xD21C, 0xD248]]) max_hp_sum = sum([self.read_hp(add) for add in [0xD18D, 0xD1B9, 0xD1E5, 0xD211, 0xD23D, 0xD269]]) + if max_hp_sum == 0: + return 0 # Avoid zero-division return hp_sum / max_hp_sum def read_hp(self, start): diff --git a/baselines/run_baseline.py b/baselines/run_baseline.py index df98411a..cdd27e42 100644 --- a/baselines/run_baseline.py +++ b/baselines/run_baseline.py @@ -12,10 +12,10 @@ updates_per_checkpoint = 4 env_config = { - 'headless': True, 'save_final_state': True, 'early_stop': False, + 'headless': True, 'save_final_state': True, 'early_stop': False, 'action_freq': 24, 'init_state': '../fast_text_start.state', 'max_steps': run_steps, - 'print_rewards': True, 'save_video': True, 'session_path': sess_path, - 'gb_path': '../PokemonRed.gb', 'debug': False, 'sim_frame_dist': 2_000_000.0 + 'print_rewards': True, 'save_video': False, 'fast_video': True, 'session_path': sess_path, + 'gb_path': '../PokemonRed.gb', 'debug': False, 'sim_frame_dist': 2_000_000.0, } env = RedGymEnv(config=env_config) @@ -24,7 +24,7 @@ learn_steps = 40 file_name = 'poke_' #'best_12-7/poke_12_b' -inference_only = True +inference_only = True if exists(file_name + '.zip'): print('\nloading checkpoint')