Skip to content

Commit

Permalink
HW2: added run_agent.py, scripts that allow load/visualize the traine…
Browse files Browse the repository at this point in the history
…d agent's performance.
  • Loading branch information
Roei Bahumi committed Aug 5, 2019
1 parent dde95f4 commit 8a31ef5
Show file tree
Hide file tree
Showing 2 changed files with 164 additions and 0 deletions.
14 changes: 14 additions & 0 deletions hw2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,17 @@ Before doing anything, first replace `gym/envs/box2d/lunar_lander.py` with the p
The only file that you need to look at is `train_pg_f18.py`, which you will implement.

See the [HW2 PDF](http://rail.eecs.berkeley.edu/deeprlcourse/static/homeworks/hw2.pdf) for further instructions.


## Running trained agent
After running `train_pg_f18.py` with a specific setting (gym environment, metaprameters) a new directory will
be added under `data` with the following structure:
```
args.exp_name + '_' + args.env_name + '_' + time.strftime("%d-%m-%Y_%H-%M-%S")
```
Under this directory, there are multiple (exact number is set by 'n_experiments' param) trained agents.
In order to visualize (render) these agents behavior, run the `run_agent.py` script and specify the number of iterations (-n option). For example:
```bash
# Run 3 iterations of a agent number 1 of
python run_agent.py "data/hc_b4000_r0.01_RoboschoolInvertedPendulum-v1_21-07-2019_08-42-10/1" -n 3
```
150 changes: 150 additions & 0 deletions hw2/run_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
"""
## Running trained agent
After running `train_pg_f18.py` with a specific setting (gym environment, metaprameters) a new directory will
be added under `data` with the following structure:
args.exp_name + '_' + args.env_name + '_' + time.strftime("%d-%m-%Y_%H-%M-%S")
Under this directory, there are multiple (exact number is set by 'n_experiments' param) trained agents.
In order to visualize (render) these agents behavior, run the `run_agent.py` script and specify the number of iterations (-n option). For example:
> python run_agent.py "data/hc_b4000_r0.01_RoboschoolInvertedPendulum-v1_21-07-2019_08-42-10/1" -n 3
"""
import os
import json
import pickle
import gym
import numpy as np
import tensorflow as tf
from train_pg_f18 import Agent


PARAMS_FILE = "params.json"
VARS_FILE = "vars.pkl"


def load_params(filename):
"""
Load the 'params.json' file.
A simple json.loads() call does not work here because the file was saved with a special separators.
:param filename: str
:return: dict
"""
with open(filename, 'r') as file:
data = file.read().replace(',\n', ',').replace('\t:\t', ':').replace('\n', '')

return json.loads(data)


def load_pickle(filename, mode='rb'):
with open(filename, mode=mode) as f:
return pickle.load(f)


def load_agent_and_env(model_dir):
"""
Load an agent with its pre-trained model and the relevant environment
Most of the code here is taken from train_pg_f18.py::train_PG() function
:param model_dir: str (full directory path to the 'params.json' and 'vars.pkl' files)
:return: tuple (a tuple of length 2, the Agent instance and the gym env object)
"""
# Load the params json
params_file = os.path.join(model_dir, PARAMS_FILE)
params = load_params(filename=params_file)
print(params)

# Load the model variables
vars_filename = os.path.join(model_dir, VARS_FILE)
model_vars = load_pickle(filename=vars_filename)
# print(model_vars)

# Make the gym environment
env = gym.make(params['env_name'])

# Set random seeds
seed = params['seed']
tf.set_random_seed(seed)
np.random.seed(seed)
#env.seed(seed)

# Is this env continuous, or self.discrete?
discrete = isinstance(env.action_space, gym.spaces.Discrete)

# Observation and action sizes
ob_dim = env.observation_space.shape[0]
ac_dim = env.action_space.n if discrete else env.action_space.shape[0]

# ========================================================================================#
# Initialize Agent
# ========================================================================================#
computation_graph_args = {
'n_layers': params['n_layers'],
'ob_dim': ob_dim,
'ac_dim': ac_dim,
'discrete': discrete,
'size': params['size'],
'learning_rate': params['learning_rate'],
}

sample_trajectory_args = {
'animate': params['animate'],
'max_path_length': params['max_path_length'],
'min_timesteps_per_batch': params['min_timesteps_per_batch'],
}

estimate_return_args = {
'gamma': params['gamma'],
'reward_to_go': params['reward_to_go'],
'nn_baseline': params['nn_baseline'],
'normalize_advantages': params['normalize_advantages'],
}

agent = Agent(computation_graph_args, sample_trajectory_args, estimate_return_args)

# build computation graph
agent.build_computation_graph()

# tensorflow: config, session, variable initialization
agent.init_tf_sess()

# Override the graph variables with the pre-trained values
for g_var in tf.global_variables(scope=None):
# Get the saved value and assign it to the tensor
value = model_vars[g_var.name]
set_variable_op = g_var.assign(value)
agent.sess.run(set_variable_op)

# # Validate that the assignment was successful
# for g_var in tf.global_variables(scope=None):
# assert np.array_equal(g_var.eval(), model_vars[g_var.name])

return agent, env


if __name__ == "__main__":
"""
Example usage (after running train_pg_18.py and creating agent 'data' dirs):
- python run_agent.py "data/hc_b4000_r0.01_RoboschoolInvertedPendulum-v1_21-07-2019_08-42-10/1" -n 3
- python run_agent.py "data/ll_b40000_r0.005_LunarLanderContinuous-v2_21-07-2019_09-59-05/1" -n 3
- python run_agent.py "data/hc_b50000_r0.005_RoboschoolHalfCheetah-v1_22-07-2019_20-04-48/1" -n 3
"""
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('model_dir', type=str, help='A relative path to the data dir of a specific experiment. For eample: "data/ll_b40000_r0.005_LunarLanderContinuous-v2_21-07-2019_09-59-05/1"')
parser.add_argument('--n_iter', '-n', type=int, default=3)
args = parser.parse_args()

# Load an agent with its pre-trained model and the relevant environment
model_dir = args.model_dir
agent, env = load_agent_and_env(model_dir)

# Run an episode with this loaded agent
for i in range(args.n_iter):
agent.sample_trajectory(env, animate_this_episode=True)
print("done")

0 comments on commit 8a31ef5

Please sign in to comment.