cbfinn · thobotics · Dec 30, 2016 · Jan 7, 2017 · Jan 7, 2017 · Jan 7, 2017
diff --git a/README.md b/README.md
@@ -1,6 +1,22 @@
 GPS
 ======
 
+Please check the mpc_gps branch for the reimplementation of MPC-GPS paper!
+
+## Demo
+1. Hallway
+```
+https://youtu.be/4h49wDTnrxw?list=PLeH0h5k_xzT5Rxuqkb2ZHuNJTIqAc76qN
+```
+
+2. 3 obstacle test
+```
+https://youtu.be/xaNUwspDp2w
+```
+---
+
+======
+
 This code is a reimplementation of the guided policy search algorithm and LQG-based trajectory optimization, meant to help others understand, reuse, and build upon existing work.
 
 For full documentation, see [rll.berkeley.edu/gps](http://rll.berkeley.edu/gps).

diff --git a/experiments/box2d_arm_example/hyperparams.py b/experiments/box2d_arm_example/hyperparams.py
@@ -37,6 +37,7 @@
     'data_files_dir': EXP_DIR + 'data_files/',
     'log_filename': EXP_DIR + 'log.txt',
     'conditions': 1,
+		'use_mpc': True,
 }
 
 if not os.path.exists(common['data_files_dir']):
@@ -46,7 +47,7 @@
     'type': AgentBox2D,
     'target_state' : np.array([0, 0]),
     'world' : ArmWorld,
-    'render' : True,
+    'render' : False,
     'x0': np.array([0.75*np.pi, 0.5*np.pi, 0, 0, 0, 0, 0]),
     'rk': 0,
     'dt': 0.05,
@@ -55,14 +56,20 @@
     'pos_body_idx': np.array([]),
     'pos_body_offset': np.array([]),
     'T': 100,
+    'use_mpc': common['use_mpc'],
+    'M': 5,
     'sensor_dims': SENSOR_DIMS,
     'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS],
     'obs_include': [],
 }
 
+#if common['use_mpc']:
+#		agent['smooth_noise_var'] = 0.3
+
 algorithm = {
     'type': AlgorithmTrajOpt,
     'conditions': common['conditions'],
+    'use_mpc': common['use_mpc'],
 }
 
 algorithm['init_traj_distr'] = {
@@ -75,6 +82,16 @@
     'T': agent['T'],
 }
 
+algorithm['init_mpc'] = {
+    'type': init_lqr,
+    'init_gains': np.zeros(SENSOR_DIMS[ACTION]),
+    'init_acc': np.zeros(SENSOR_DIMS[ACTION]),
+    'init_var': 0.1,
+    'stiffness': 0.01,
+    'dt': agent['dt'],
+    'T': agent['M'],
+}
+
 action_cost = {
     'type': CostAction,
     'wu': np.array([1, 1])

diff --git a/experiments/box2d_pointmass_badmm_example/hyperparams.py b/experiments/box2d_pointmass_badmm_example/hyperparams.py
@@ -0,0 +1,163 @@
+""" Hyperparameters for Box2d Point Mass."""
+from __future__ import division
+
+import os.path
+from datetime import datetime
+import numpy as np
+
+from gps import __file__ as gps_filepath
+from gps.agent.box2d.agent_box2d import AgentBox2D
+from gps.agent.box2d.point_mass_world import PointMassWorld
+from gps.algorithm.algorithm_badmm import AlgorithmBADMM
+from gps.algorithm.cost.cost_state import CostState
+from gps.algorithm.cost.cost_action import CostAction
+from gps.algorithm.cost.cost_sum import CostSum
+from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
+from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
+from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM
+from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
+from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
+from gps.algorithm.policy.lin_gauss_init import init_pd
+from gps.gui.config import generate_experiment_info
+from gps.proto.gps_pb2 import END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, ACTION, POSITION_NEAREST_OBSTACLE
+
+SENSOR_DIMS = {
+    END_EFFECTOR_POINTS: 3,
+    END_EFFECTOR_POINT_VELOCITIES: 3,
+    ACTION: 2
+}
+
+BASE_DIR = '/'.join(str.split(gps_filepath, '/')[:-2])
+EXP_DIR = BASE_DIR + '/../experiments/box2d_pointmass_badmm_example/'
+
+
+common = {
+    'experiment_name': 'box2d_pointmass_badmm_example' + '_' + \
+            datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'),
+    'experiment_dir': EXP_DIR,
+    'data_files_dir': EXP_DIR + 'data_files/',
+    'log_filename': EXP_DIR + 'log.txt',
+    'conditions': 4,
+    'use_mpc': True,
+}
+
+if not os.path.exists(common['data_files_dir']):
+    os.makedirs(common['data_files_dir'])
+
+agent = {
+    'type': AgentBox2D,
+    'target_state' : np.array([5, 20, 0]),
+    "world" : PointMassWorld,
+    'render' : False,
+    'x0': [np.array([0, 5, 0, 0, 0, 0]),
+           np.array([0, 30, 0, 0, 0, 0]),
+           np.array([10, 5, 0, 0, 0, 0]),
+           np.array([10, 30, 0, 0, 0, 0]),
+          ],
+    'rk': 0,
+    'dt': 0.05,
+    'substeps': 1,
+    'conditions': common['conditions'],
+    'pos_body_idx': np.array([]),
+    'pos_body_offset': np.array([]),
+    'T': 100,
+    'use_mpc': common['use_mpc'],
+    'M': 5,
+    'sensor_dims': SENSOR_DIMS,
+    'state_include': [END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES],
+    'obs_include': [END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES],
+}
+
+algorithm = {
+    'type': AlgorithmBADMM,
+    'conditions': common['conditions'],
+    'iterations': 10,
+    'lg_step_schedule': np.array([1e-4, 1e-3, 1e-2, 1e-2]),
+    'policy_dual_rate': 0.2,
+    'ent_reg_schedule': np.array([1e-3, 1e-3, 1e-2, 1e-1]),
+    'fixed_lg_step': 3,
+    'kl_step': 5.0,
+    'min_step_mult': 0.01,
+    'max_step_mult': 1.0,
+    'sample_decrease_var': 0.05,
+    'sample_increase_var': 0.1,
+}
+
+algorithm['init_traj_distr'] = {
+    'type': init_pd,
+    'init_var': 5.0,
+    'pos_gains': 0.0,
+    'dQ': SENSOR_DIMS[ACTION],
+    'dt': agent['dt'],
+    'T': agent['T'],
+}
+
+algorithm['init_mpc'] = {
+    'type': init_pd,
+    'init_var': 5.0,
+    'pos_gains': 0.0,
+    'dQ': SENSOR_DIMS[ACTION],
+    'dt': agent['dt'],
+    'T': agent['M'],
+}
+
+action_cost = {
+    'type': CostAction,
+    'wu': np.array([5e-5, 5e-5])
+}
+
+state_cost = {
+    'type': CostState,
+    'data_types' : {
+        END_EFFECTOR_POINTS: {
+            'wp': np.ones(SENSOR_DIMS[END_EFFECTOR_POINTS]),
+            'target_state': agent["target_state"],
+        },
+    },
+}
+
+algorithm['cost'] = {
+    'type': CostSum,
+    'costs': [action_cost, state_cost],
+    'weights': [1.0, 1.0],
+}
+
+algorithm['dynamics'] = {
+    'type': DynamicsLRPrior,
+    'regularization': 1e-6,
+    'prior': {
+        'type': DynamicsPriorGMM,
+        'max_clusters': 20,
+        'min_samples_per_cluster': 40,
+        'max_samples': 20,
+    },
+}
+
+algorithm['traj_opt'] = {
+    'type': TrajOptLQRPython,
+}
+
+algorithm['policy_opt'] = {
+    'type': PolicyOptCaffe,
+    'weights_file_prefix': EXP_DIR + 'policy',
+}
+
+algorithm['policy_prior'] = {
+    'type': PolicyPriorGMM,
+    'max_clusters': 20,
+    'min_samples_per_cluster': 40,
+    'max_samples': 20,
+}
+
+config = {
+    'iterations': 10,
+    'num_samples': 5,
+    'verbose_trials': 5,
+    'verbose_policy_trials': 0,
+    'common': common,
+    'agent': agent,
+    'gui_on': True,
+    'algorithm': algorithm,
+}
+
+common['info'] = generate_experiment_info(config)
diff --git a/experiments/box2d_pointmass_example/hyperparams.py b/experiments/box2d_pointmass_example/hyperparams.py
@@ -9,14 +9,15 @@
 from gps.agent.box2d.agent_box2d import AgentBox2D
 from gps.agent.box2d.point_mass_world import PointMassWorld
 from gps.algorithm.algorithm_traj_opt import AlgorithmTrajOpt
+from gps.algorithm.cost.cost_obstacles import CostObstacle
 from gps.algorithm.cost.cost_state import CostState
 from gps.algorithm.cost.cost_action import CostAction
 from gps.algorithm.cost.cost_sum import CostSum
 from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
 from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
 from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
 from gps.algorithm.policy.lin_gauss_init import init_pd
-from gps.proto.gps_pb2 import END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, ACTION
+from gps.proto.gps_pb2 import END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, ACTION, POSITION_NEAREST_OBSTACLE
 from gps.gui.config import generate_experiment_info
 
 SENSOR_DIMS = {
@@ -36,6 +37,7 @@
     'target_filename': EXP_DIR + 'target.npz',
     'log_filename': EXP_DIR + 'log.txt',
     'conditions': 1,
+    'use_mpc': True,
 }
 
 if not os.path.exists(common['data_files_dir']):
@@ -46,22 +48,28 @@
     'target_state' : np.array([5, 20, 0]),
     "world" : PointMassWorld,
     'render' : False,
-    'x0': np.array([0, 5, 0, 0, 0, 0]),
+    'x0': [np.array([0, 5, 0, 0, 0, 0])],
     'rk': 0,
     'dt': 0.05,
     'substeps': 1,
     'conditions': common['conditions'],
     'pos_body_idx': np.array([]),
     'pos_body_offset': np.array([]),
     'T': 100,
+    'use_mpc': common['use_mpc'],
+    'M': 5,
     'sensor_dims': SENSOR_DIMS,
     'state_include': [END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES],
     'obs_include': [],
 }
 
+#if common['use_mpc']:
+#		agent['smooth_noise_var'] = 1.0
+
 algorithm = {
     'type': AlgorithmTrajOpt,
     'conditions': common['conditions'],
+    'use_mpc': common['use_mpc'],
 }
 
 algorithm['init_traj_distr'] = {
@@ -73,6 +81,15 @@
     'T': agent['T'],
 }
 
+algorithm['init_mpc'] = {
+    'type': init_pd,
+    'init_var': 5.0,
+    'pos_gains': 0.0,
+    'dQ': SENSOR_DIMS[ACTION],
+    'dt': agent['dt'],
+    'T': agent['M'],
+}
+
 action_cost = {
     'type': CostAction,
     'wu': np.array([5e-5, 5e-5])