Skip to content

Commit

Permalink
Merge pull request cbfinn#49 from SymbioRobotics/master
Browse files Browse the repository at this point in the history
Universal Robot functionality in Gazebo simulation for GPS.
  • Loading branch information
ahundt committed Jan 2, 2017
2 parents 17220c0 + 5b335f0 commit ea527b8
Show file tree
Hide file tree
Showing 9 changed files with 1,422 additions and 3 deletions.
43 changes: 42 additions & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ Obtain a key, which should be named `mjkey.txt`, and place the key into the `mjp
**ROS Setup** (optional)
1. Install [ROS](http://ros.org), including the standard [PR2 packages](http://wiki.ros.org/Robots/PR2)
1. Install [ROS](http://ros.org), including the standard [PR2 packages](http://wiki.ros.org/Robots/PR2) and the [Universal Robot Stack](wiki.ros.org/Industrial/Install)
2. Set up paths by adding the following to your `~/.bashrc` file:
Expand Down Expand Up @@ -265,6 +265,47 @@ python python/gps/gps_main.py pr2_badmm_example

To learn how to make your own experiment and/or set your own initial and target positions, see [the next section](#running-a-new-experiment)

#### Simulated UR example

To run the code on a simulated UR be sure to first follow the instructions above for ROS setup.

You must also set an environmental variable UR_PATH that makes gps aware of the location of the installed universal_robot package. You can do this be adding the following to your .bashrc.

```sh
export UR_PATH=/path/to/universal_robot
```

After adding this line run:
```sh
source .bashrc
```

###### 1. Start the controller

Note: You may find it helpful early on to edit the default ur<5 or 10>.launch file in the ur_gazebo package by changing the -z argument in the spawn_gazebo_model node from .1 to a larger number like 5. This will place the simulated UR, 5 meters above the ground rather than .1 removing the potential for the robot to interact with the ground.

Launch gazebo and spawn a UR<5 or 10> inside of it.

```sh
roslaunch ur_gazebo ur<5 or 10>.launch
```

Now you're ready to run the examples via gps_main. This can be done on any machine as long as the variables are set appropriately.
The first example starts with a default initial controller and learns to move the end effector joint to a specified location.
Run the following from the gps directory:
```sh
python python/gps/gps_main.py ur_example
```
The second example trains a neural network policy to reach a goal pose from different starting positions, using guided policy search.
```sh
python python/gps/gps_main.py ur_caffe_example
```
#### Running a new experiment
1. Set up a new experiment directory by running:
```sh
Expand Down
314 changes: 314 additions & 0 deletions experiments/ur_caffe_example/hyperparams.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,314 @@
# To get started, copy over hyperparams from another experiment.
# Visit rll.berkeley.edu/gps/hyperparams.html for documentation.
""" Hyperparameters for UR trajectory optimization experiment. """
from __future__ import division
import numpy as np
import os.path
import rospy
from datetime import datetime
from tf import TransformListener

from gps import __file__ as gps_filepath
from gps.agent.ur_ros.agent_ur import AgentURROS
from gps.algorithm.algorithm_badmm import AlgorithmBADMM
from gps.algorithm.cost.cost_fk import CostFK
from gps.algorithm.cost.cost_sum import CostSum
from gps.algorithm.cost.cost_utils import RAMP_LINEAR, RAMP_FINAL_ONLY
from gps.algorithm.dynamics.dynamics_lr_prior import DynamicsLRPrior
from gps.algorithm.dynamics.dynamics_prior_gmm import DynamicsPriorGMM
from gps.algorithm.policy_opt.policy_opt_caffe import PolicyOptCaffe
from gps.algorithm.traj_opt.traj_opt_lqr_python import TrajOptLQRPython
from gps.algorithm.policy.lin_gauss_init import init_lqr
from gps.algorithm.policy.policy_prior_gmm import PolicyPriorGMM
from gps.gui.target_setup_gui import load_pose_from_npz
from gps.proto.gps_pb2 import JOINT_ANGLES, JOINT_VELOCITIES, \
END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES, ACTION, \
TRIAL_ARM, JOINT_SPACE
from gps.utility.general_utils import get_ee_points, get_position
from gps.gui.config import generate_experiment_info

# Topics for the robot publisher and subscriber.
JOINT_PUBLISHER = '/arm_controller/command'
JOINT_SUBSCRIBER = '/arm_controller/state'

# 'SLOWNESS' is how far in the future (in seconds) position control extrapolates
# when it publishs actions for robot movement. 1.0-10.0 is fine for simulation.
SLOWNESS = 10.0
# 'RESET_SLOWNESS' is how long (in seconds) we tell the robot to take when
# returning to its start configuration.
RESET_SLOWNESS = 2.0

# Set constants for joints
SHOULDER_PAN_JOINT = 'shoulder_pan_joint'
SHOULDER_LIFT_JOINT = 'shoulder_lift_joint'
ELBOW_JOINT = 'elbow_joint'
WRIST_1_JOINT = 'wrist_1_joint'
WRIST_2_JOINT = 'wrist_2_joint'
WRIST_3_JOINT = 'wrist_3_joint'

# Set constants for links
BASE_LINK = 'base_link'
SHOULDER_LINK = 'shoulder_link'
UPPER_ARM_LINK = 'upper_arm_link'
FOREARM_LINK = 'forearm_link'
WRIST_1_LINK = 'wrist_1_link'
WRIST_2_LINK = 'wrist_2_link'
WRIST_3_LINK = 'wrist_3_link'

# Set end effector constants
INITIAL_JOINTS = [0, -np.pi/2, np.pi/2, 0, 0, 0]

# Set the number of goal points. 1 by default for a single end effector tip.
NUM_EE_POINTS = 1
EE_POINTS = np.array([[0, 0, 0]])

# Specify a goal state in cartesian coordinates.
EE_POS_TGT = np.asmatrix([.70, .70, .50])
"""UR 10 Examples:
EE_POS_TGT = np.asmatrix([.29, .52, .62]) # Target where all joints are 0.
EE_POS_TGT = np.asmatrix([.65, .80, .30]) # Target in positive octant near ground.
EE_POS_TGT = np.asmatrix([.70, .70, .50]) # Target in positive octant used for debugging convergence.
The Gazebo sim converges to the above point with non-action costs:
(-589.75, -594.71, -599.54, -601.54, -602.75, -603.28, -604.28, -604.79, -605.55, -606.29)
Distance from Goal: (0.014, 0.005, -0.017)
"""

# Set to identity unless you want the goal to have a certain orientation.
EE_ROT_TGT = np.asmatrix([[1, 0, 0], [0, 1, 0], [0, 0, 1]])

# Only edit these when editing the robot joints and links.
# The lengths of these arrays define numerous parameters in GPS.
JOINT_ORDER = [SHOULDER_PAN_JOINT, SHOULDER_LIFT_JOINT, ELBOW_JOINT,
WRIST_1_JOINT, WRIST_2_JOINT, WRIST_3_JOINT]
LINK_NAMES = [BASE_LINK, SHOULDER_LINK, UPPER_ARM_LINK, FOREARM_LINK,
WRIST_1_LINK, WRIST_2_LINK, WRIST_3_LINK]

# Hyperparamters to be tuned for optimizing policy learning on the specific robot.
UR_GAINS = np.array([1, 1, 1, 1, 1, 1])

# Packaging sensor dimensional data for refernece.
SENSOR_DIMS = {
JOINT_ANGLES: len(JOINT_ORDER),
JOINT_VELOCITIES: len(JOINT_ORDER),
END_EFFECTOR_POINTS: NUM_EE_POINTS * EE_POINTS.shape[1],
END_EFFECTOR_POINT_VELOCITIES: NUM_EE_POINTS * EE_POINTS.shape[1],
ACTION: len(UR_GAINS),
}

# States to check in agent._process_observations.
STATE_TYPES = {'positions': JOINT_ANGLES,
'velocities': JOINT_VELOCITIES}

# Path to urdf of robot.
TREE_PATH = os.environ['UR_PATH'] + '/ur_description/urdf/ur10_robot.urdf'

# Be sure to specify the correct experiment directory to save policy data at.
BASE_DIR = '/'.join(str.split(gps_filepath, '/')[:-2])
EXP_DIR = BASE_DIR + '/../experiments/ur_caffe_example/'

# Set the number of seconds per step of a sample.
TIMESTEP = 0.01 # Typically 0.01.
# Set the number of timesteps per sample.
STEP_COUNT = 100 # Typically 100.
# Set the number of samples per condition.
SAMPLE_COUNT = 5 # Typically 5.
# set the number of conditions per iteration.
CONDITIONS = 2 # Typically 2 for Caffe and 1 for LQR.
# Set the number of trajectory iterations to collect.
ITERATIONS = 10 # Typically 10.

x0s = []
ee_tgts = []
reset_conditions = []

common = {
'experiment_name': 'my_experiment' + '_' + \
datetime.strftime(datetime.now(), '%m-%d-%y_%H-%M'),
'experiment_dir': EXP_DIR,
'data_files_dir': EXP_DIR + 'data_files/',
'target_filename': EXP_DIR + 'target.npz',
'log_filename': EXP_DIR + 'log.txt',
'conditions': CONDITIONS,
}

# Set up each condition.
for i in xrange(common['conditions']):

# Use hardcoded default vals init and target locations
ja_x0 = np.zeros(SENSOR_DIMS[JOINT_ANGLES])
ee_pos_x0 = np.zeros((1, 3))
ee_rot_x0 = np.zeros((3, 3))

ee_pos_tgt = EE_POS_TGT
ee_rot_tgt = EE_ROT_TGT

state_space = sum(SENSOR_DIMS.values()) - SENSOR_DIMS[ACTION]

joint_dim = SENSOR_DIMS[JOINT_ANGLES] + SENSOR_DIMS[JOINT_VELOCITIES]

# Initialized to start position and inital velocities are 0
x0 = np.zeros(state_space)
x0[:SENSOR_DIMS[JOINT_ANGLES]] = ja_x0

# Need for this node will go away upon migration to KDL
rospy.init_node('gps_agent_ur_ros_node')

# Set starting end effector position using TF
tf = TransformListener()

# Sleep for .1 secs to give the node a chance to kick off
rospy.sleep(1)
time = tf.getLatestCommonTime(WRIST_3_LINK, BASE_LINK)

x0[joint_dim:(joint_dim + NUM_EE_POINTS * EE_POINTS.shape[1])] = get_position(tf, WRIST_3_LINK, BASE_LINK, time)

# Initialize target end effector position
ee_tgt = np.ndarray.flatten(
get_ee_points(EE_POINTS, ee_pos_tgt, ee_rot_tgt).T
)

reset_condition = {
JOINT_ANGLES: INITIAL_JOINTS,
JOINT_VELOCITIES: []
}

x0s.append(x0)
ee_tgts.append(ee_tgt)
reset_conditions.append(reset_condition)


if not os.path.exists(common['data_files_dir']):
os.makedirs(common['data_files_dir'])

agent = {
'type': AgentURROS,
'dt': TIMESTEP,
'dU': SENSOR_DIMS[ACTION],
'conditions': common['conditions'],
'T': STEP_COUNT,
'x0': x0s,
'ee_points_tgt': ee_tgts,
'reset_conditions': reset_conditions,
'sensor_dims': SENSOR_DIMS,
'joint_order': JOINT_ORDER,
'link_names': LINK_NAMES,
'state_types': STATE_TYPES,
'tree_path': TREE_PATH,
'joint_publisher': JOINT_PUBLISHER,
'joint_subscriber': JOINT_SUBSCRIBER,
'slowness': SLOWNESS,
'reset_slowness': RESET_SLOWNESS,
'state_include': [JOINT_ANGLES, JOINT_VELOCITIES,
END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES],
'end_effector_points': [EE_POINTS],
'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES, END_EFFECTOR_POINTS, END_EFFECTOR_POINT_VELOCITIES]
}

algorithm = {
'type': AlgorithmBADMM,
'conditions': common['conditions'],
'iterations': ITERATIONS,
'lg_step_schedule': np.array([1e-4, 1e-3, 1e-2, 1e-1]),
'policy_dual_rate': 0.1,
'ent_reg_schedule': np.array([1e-3, 1e-3, 1e-2, 1e-1]),
'fixed_lg_step': 3,
'kl_step': 5.0,
'init_pol_wt': 0.01,
'min_step_mult': 0.01,
'max_step_mult': 1.0,
'sample_decrease_var': 0.05,
'sample_increase_var': 0.1,
'exp_step_increase': 2.0,
'exp_step_decrease': 0.5,
'exp_step_upper': 0.5,
'exp_step_lower': 1.0,
'max_policy_samples': 6,
'policy_sample_mode': 'add',
}

algorithm['init_traj_distr'] = {
'type': init_lqr,
'init_gains': 1.0 / UR_GAINS,
'init_acc': np.zeros(SENSOR_DIMS[ACTION]),
'init_var': 1.0,
'stiffness': 0.5,
'stiffness_vel': .25,
'final_weight': 50,
'dt': agent['dt'],
'T': agent['T'],
}

# This cost function takes into account the distance between the end effector's
# current and target positions, weighted in a linearly increasing fassion
# as the number of trials grows from 0 to T-1.
fk_cost_ramp = {
'type': CostFK,
# Target end effector is subtracted out of EE_POINTS in pr2 c++ plugin so goal
# is 0. The UR agent also subtracts this out for consistency.
'target_end_effector': [np.zeros(NUM_EE_POINTS * EE_POINTS.shape[1])],
'wp': np.ones(SENSOR_DIMS[END_EFFECTOR_POINTS]),
'l1': 0.1,
'l2': 0.0001,
'ramp_option': RAMP_LINEAR,
}

# This cost function takes into account the distance between the end effector's
# current and target positions at time T-1 only.
fk_cost_final = {
'type': CostFK,
'target_end_effector': np.zeros(NUM_EE_POINTS * EE_POINTS.shape[1]),
'wp': np.ones(SENSOR_DIMS[END_EFFECTOR_POINTS]),
'l1': 1.0,
'l2': 0.0,
'wp_final_multiplier': 100.0, # Weight multiplier on final timestep.
'ramp_option': RAMP_FINAL_ONLY,
}

# Combines the cost functions in 'costs' to produce a single cost function
algorithm['cost'] = {
'type': CostSum,
'costs': [fk_cost_ramp, fk_cost_final],
'weights': [1.0, 1.0],
}

algorithm['dynamics'] = {
'type': DynamicsLRPrior,
'regularization': 1e-6,
'prior': {
'type': DynamicsPriorGMM,
'max_clusters': 20,
'min_samples_per_cluster': 40,
'max_samples': 20,
},
}

algorithm['traj_opt'] = {
'type': TrajOptLQRPython,
}

algorithm['policy_opt'] = {
'type': PolicyOptCaffe,
'weights_file_prefix': EXP_DIR + 'policy',
'iterations': 3000,
'dU': SENSOR_DIMS[ACTION]
}

algorithm['policy_prior'] = {
'type': PolicyPriorGMM,
'max_clusters': 20,
'min_samples_per_cluster': 40,
'max_samples': 40,
}

config = {
'iterations': algorithm['iterations'],
'common': common,
'verbose_trials': 0,
'verbose_policy_trials': 1,
'agent': agent,
'gui_on': True,
'algorithm': algorithm,
'num_samples': SAMPLE_COUNT,
}

common['info'] = generate_experiment_info(config)
Loading

0 comments on commit ea527b8

Please sign in to comment.