cbfinn · avisingh599 · Nov 2, 2016 · Nov 2, 2016 · Nov 2, 2016 · Nov 2, 2016
diff --git a/docs/index.md b/docs/index.md
@@ -94,25 +94,11 @@ Here are the instructions for setting up [Pybox2D](https://github.com/pybox2d/py
 
 **MuJoCo Setup** (optional)
 
-In addition to the dependencies listed above, [OpenSceneGraph](http://www.openscenegraph.org/)(v3.0.1+) is also needed. It can be installed by running `sudo apt-get install openscenegraph libopenscenegraph-dev`.
 
-1. [Install MuJoCo](https://www.roboti.us/) (v1.22+) and place the downloaded `mjpro` directory into `gps/src/3rdparty`.
-MuJoCo is a high-quality physics engine and requires requires a license.
-Obtain a key, which should be named `mjkey.txt`, and place the key into the `mjpro` directory.
+1. [Install mujoco_py from OpenAI](https://github.com/openai/mujoco-py/) 
+Follow the instructions in the README of the above repository to set up MuJoCo and `mujoco_py`. MuJoCo is physics engine (which requires a license key) and mujoco_py are python bindings that wrap around MuJoCo's C/C++ API.
+2. Remeber to source `~/.bashrc` after you have completed the above installation. 
 
-2. Build `gps/src/3rdparty` by running:
-    ```sh
-    cd gps/build
-    cmake ../src/3rdparty
-    make -j
-    ```
-
-3. Set up paths by adding the following to your `~/.bashrc` file:
-    ```sh
-    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/gps/build/lib
-    export PYTHONPATH=$PYTHONPATH:/path/to/gps/build/lib
-    ```
-    Don't forget to run `source ~/.bashrc` afterward.
 
 **ROS Setup** (optional)
 

diff --git a/experiments/mjc_badmm_example/hyperparams.py b/experiments/mjc_badmm_example/hyperparams.py
@@ -16,7 +16,6 @@
 from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
 from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
 from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
-from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
 from gps.algorithm.policy.lin_gauss_init import init_lqr
 from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM
 from gps.algorithm.policy.policy_prior import PolicyPrior
@@ -26,7 +25,7 @@
         END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, ACTION
 from gps.gui.config import generate_experiment_info
 
-ALGORITHM_NN_LIBRARY = "caffe"
+ALGORITHM_NN_LIBRARY = "tf"
 
 SENSOR_DIMS = {
     JOINT_ANGLES: 7,
@@ -64,15 +63,19 @@
     'substeps': 5,
     'conditions': common['conditions'],
     'pos_body_idx': np.array([1]),
-    'pos_body_offset': [np.array([0.1, 0.1, 0]), np.array([0.1, -0.1, 0]),
-                        np.array([-0.1, -0.1, 0]), np.array([-0.1, 0.1, 0])],
+    'pos_body_offset': [
+                        [np.array([0.1, 0.1, 0])],
+                        [np.array([0.1, -0.1, 0])],
+                        [np.array([-0.1, -0.1, 0])], 
+                        [np.array([-0.1, 0.1, 0])]
+                        ],
     'T': 100,
     'sensor_dims': SENSOR_DIMS,
     'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
                       END_EFFECTOR_POINT_VELOCITIES],
     'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
                     END_EFFECTOR_POINT_VELOCITIES],
-    'camera_pos': np.array([0., 0., 2., 0., 0.2, 0.5]),
+    'camera_pos': np.array([0., 0., 0., 1., -90., 90.]),
 }
 
 algorithm = {
@@ -163,6 +166,7 @@
         'network_model': example_tf_network
     }
 elif ALGORITHM_NN_LIBRARY == "caffe":
+    from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
     algorithm['policy_opt'] = {
         'type': PolicyOptCaffe,
         'weights_file_prefix': EXP_DIR + 'policy',

diff --git a/experiments/mjc_example/hyperparams.py b/experiments/mjc_example/hyperparams.py
@@ -55,14 +55,19 @@
     'substeps': 5,
     'conditions': common['conditions'],
     'pos_body_idx': np.array([1]),
-    'pos_body_offset': [np.array([0, 0.2, 0]), np.array([0, 0.1, 0]),
-                        np.array([0, -0.1, 0]), np.array([0, -0.2, 0])],
+    'pos_body_offset': [
+                        [np.array([0.1, 0.1, 0])],
+                        [np.array([0.1, -0.1, 0])],
+                        [np.array([-0.1, -0.1, 0])], 
+                        [np.array([-0.1, 0.1, 0])]
+                        ],
     'T': 100,
     'sensor_dims': SENSOR_DIMS,
     'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
                       END_EFFECTOR_POINT_VELOCITIES],
     'obs_include': [],
-    'camera_pos': np.array([0., 0., 2., 0., 0.2, 0.5]),
+    'camera_pos': np.array([0., 0., 0., 1., -90., 90.]),
+
 }
 
 algorithm = {

diff --git a/experiments/mjc_mdgps_example/hyperparams.py b/experiments/mjc_mdgps_example/hyperparams.py
@@ -60,8 +60,12 @@
     'substeps': 5,
     'conditions': common['conditions'],
     'pos_body_idx': np.array([1]),
-    'pos_body_offset': [np.array([-0.08, -0.08, 0]), np.array([-0.08, 0.08, 0]),
-                        np.array([0.08, 0.08, 0]), np.array([0.08, -0.08, 0])],
+    'pos_body_offset': [
+                        [np.array([0.1, 0.1, 0])],
+                        [np.array([0.1, -0.1, 0])],
+                        [np.array([-0.1, -0.1, 0])], 
+                        [np.array([-0.1, 0.1, 0])]
+                        ],
     'T': 100,
     'sensor_dims': SENSOR_DIMS,
     'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,

diff --git a/experiments/mjc_peg_images/hyperparams.py b/experiments/mjc_peg_images/hyperparams.py
@@ -74,8 +74,12 @@
     'train_conditions': common['train_conditions'],
     'test_conditions': common['test_conditions'],
     'pos_body_idx': np.array([1]),
-    'pos_body_offset': [np.array([0.0, 0.12, 0]), np.array([0.0, -0.08, 0]),
-                        np.array([-0.2, -0.08, 0]), np.array([-0.2, 0.12, 0])],
+    'pos_body_offset': [
+                    [np.array([0.1, 0.1, 0])],
+                    [np.array([0.1, -0.1, 0])],
+                    [np.array([-0.1, -0.1, 0])], 
+                    [np.array([-0.1, 0.1, 0])]
+                    ],
     'T': 100,
     'sensor_dims': SENSOR_DIMS,
     'state_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS,
@@ -85,7 +89,7 @@
     'image_width': IMAGE_WIDTH,
     'image_height': IMAGE_HEIGHT,
     'image_channels': IMAGE_CHANNELS,
-    'camera_pos': np.array([0., 0., 2., 0., 0.2, 0.5]),
+    'camera_pos': np.array([0., 0., 0., 2., -90, +90]),
 }
 
 algorithm = {

diff --git a/experiments/reacher_images/hyperparams.py b/experiments/reacher_images/hyperparams.py
@@ -17,7 +17,6 @@
 from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
 from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
 from gps.algorithm.policy.lin_gauss_init import init_lqr, init_pd
-from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
 from gps.algorithm.policy_opt.policy_opt_tf import PolicyOptTf
 from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM
 from gps.algorithm.policy_opt.tf_model_example import example_tf_network
@@ -57,7 +56,7 @@
 np.random.seed(14)
 pos_body_offset = []
 for _ in range(CONDITIONS):
-    pos_body_offset.append(np.array([0.4*np.random.rand()-0.3, 0.4*np.random.rand()-0.1, 0]))
+    pos_body_offset.append([np.array([0.4*np.random.rand()-0.3, 0.4*np.random.rand()-0.1, 0])])
 
 common = {
     'experiment_name': 'my_experiment' + '_' + \
@@ -93,7 +92,7 @@
     'image_height': IMAGE_HEIGHT,
     'image_channels': IMAGE_CHANNELS,
     'sensor_dims': SENSOR_DIMS,
-    'camera_pos': np.array([0., 0., 1.5, 0., 0., 0.]),
+    'camera_pos': np.array([0., 0., 0., 1., -90., 90.]),
     'record_reward': True,
 }
 
@@ -111,7 +110,7 @@
     'plot_dir': EXP_DIR,
     'agent_pos_body_idx': agent['pos_body_idx'],
     'agent_pos_body_offset': agent['pos_body_offset'],
-    'target_end_effector': [np.concatenate([np.array([.1, -.1, .01])+ agent['pos_body_offset'][i], np.array([0., 0., 0.])])
+    'target_end_effector': [np.concatenate([np.array([.1, -.1, .01])+ agent['pos_body_offset'][i][0], np.array([0., 0., 0.])])
                             for i in xrange(CONDITIONS)],
 }
 
@@ -150,7 +149,7 @@
 
 fk_cost_1 = [{
     'type': CostFK,
-    'target_end_effector': np.concatenate([np.array([.1, -.1, .01])+ agent['pos_body_offset'][i], np.array([0., 0., 0.])]),
+    'target_end_effector': np.concatenate([np.array([.1, -.1, .01])+ agent['pos_body_offset'][i][0], np.array([0., 0., 0.])]),
     'wp': np.array([1, 1, 1, 0, 0, 0]),
     'l1': 1.0,
     'l2': 0.0,
@@ -161,7 +160,7 @@
 algorithm['cost'] = [{
     'type': CostSum,
     'costs': [torque_cost_1[i], fk_cost_1[i]],
-    'weights': [2.0, 1.0],
+    'weights': [20.0, 1.0],
 }  for i in range(common['conditions'])]
 
 algorithm['dynamics'] = {

diff --git a/mjc_models/pr2_arm3d.xml b/mjc_models/pr2_arm3d.xml
@@ -8,6 +8,7 @@
     </default>
 
     <worldbody>
+        <light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/> 
         <body name="r_shoulder_pan_link" pos="0 -0.188 0">
             <geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" />
             <geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" />

diff --git a/mjc_models/reacher.xml b/mjc_models/reacher.xml
@@ -6,6 +6,7 @@
 	</default>
 	<option gravity="0 0 -9.81" integrator="RK4" timestep="0.01"/>
 	<worldbody>
+		<light diffuse=".5 .5 .5" pos="0 0 2" dir="0 0 -1"/> 
 		<!-- Arena -->
 		<geom conaffinity="0" fromto="-.3 -.3 .01 .3 -.3 .01" name="sideS" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>
 		<geom conaffinity="0" fromto=" .3 -.3 .01 .3  .3 .01" name="sideE" rgba="0.9 0.4 0.6 1" size=".02" type="capsule"/>