Merge of 76b75ce

PiperOrigin-RevId: 505088236 Change-Id: I54aeb33b22050bb9910de680f3111b6fb0ac7fff
MacroJou · Jan 27, 2023 · ef5647c · ef5647c
2 parents fda7e3f + 76b75ce
commit ef5647c
Show file tree

Hide file tree

Showing 19 changed files with 259 additions and 135 deletions.
diff --git a/README.md b/README.md
@@ -16,7 +16,11 @@
 MJPC allows the user to easily author and solve complex robotics tasks, and currently supports three shooting-based planners: derivative-based iLQG and Gradient Descent, and a simple yet very competitive derivative-free method called Predictive Sampling.
 
 - [Overview](#overview)
+- [Graphical User Interface](#graphical-user-interface)
 - [Installation](#installation)
+  - [macOS](#macos)
+  - [Ubuntu](#ubuntu)
+  - [Build Issues](#build-issues)
 - [Predictive Control](#predictive-control)
 - [Contributing](#contributing)
 - [Known Issues](#known-issues)
@@ -36,6 +40,8 @@ For a longer talk at the MIT Robotics Seminar describing our results, click belo
 
 [![Talk](http://img.youtube.com/vi/2xVN-qY78P4/hqdefault.jpg)](https://www.youtube.com/watch?v=2xVN-qY78P4)
 
+## Graphical User Interface
+
 For a detailed dive of the graphical user interface, see the [MJPC GUI](docs/GUI.md) documentation.
 
 ## Installation

diff --git a/docs/GUI.md b/docs/GUI.md
@@ -2,9 +2,26 @@
 
 - [Graphical User Interface](#graphical-user-interface)
   - [Overview](#overview)
+  - [User Guide](#user-guide)
 
 ## Overview
 
 The MJPC GUI is built on top of MuJoCo's `simulate` application with a few additional features. The below screenshot shows a capture of the GUI for the `walker` task.
 
 ![GUI](assets/gui.png)
+
+## User Guide
+
+- Press `F1` to bring up a help pane that describes how to use the GUI.
+- The MJPC GUI is an extension of MuJoCo's native `simulate` viewer, with the same keyboard shortcuts and mouse functionality.
+    - `+` speeds up the simulation, resulting in fewer planning steps per simulation step.
+    - `-` slows down the simulation, resulting in more planning steps per simulation step.
+- The `simulate` viewer enables drag-and-drop interaction with simulated objects to apply forces or torques.
+    - Double-click on a body to select it.
+    - `Ctrl + left drag` applies a torque to the selected object, resulting in rotation.
+    - `Ctrl + right drag` applies a force to the selected object in the `(x,z)` plane, resulting in translation.
+    - `Ctrl + Shift + right drag` applies a force to the selected object in the `(x,y)` plane.
+- MJPC adds three keyboard shortcuts:
+    - The `Enter` key starts and stops the planner.
+    - The `\` key starts and stops the controller (sending actions from the planner to the model).
+    - The `9` key turns the traces on/off.
diff --git a/docs/OVERVIEW.md b/docs/OVERVIEW.md
@@ -224,7 +224,7 @@ The swimmer's cost has two terms:
 
 The repository includes additional example tasks:
 
-- Humanoid [Stand](../mjpc/tasks/humanoid/task_stand.xml) | [Walk](../mjpc/tasks/humanoid/task_walk.xml)
+- Humanoid [Stand](../mjpc/tasks/humanoid/stand/task.xml) | [Walk](../mjpc/tasks/humanoid/walk/task.xml)
 - Quadruped [Terrain](../mjpc/tasks/quadruped/task_hill.xml) | [Flat](../mjpc/tasks/quadruped/task_flat.xml)
 - [Walker](../mjpc/tasks/walker/task.xml)
 - [In-Hand Manipulation](../mjpc/tasks/hand/task.xml)

diff --git a/mjpc/CMakeLists.txt b/mjpc/CMakeLists.txt
@@ -230,8 +230,10 @@ add_library(
   tasks/cartpole/cartpole.h
   tasks/hand/hand.cc
   tasks/hand/hand.h
-  tasks/humanoid/humanoid.cc
-  tasks/humanoid/humanoid.h
+  tasks/humanoid/stand/task.cc
+  tasks/humanoid/stand/task.h
+  tasks/humanoid/walk/task.cc
+  tasks/humanoid/walk/task.h
   tasks/panda/panda.cc
   tasks/panda/panda.h
   tasks/particle/particle.cc

diff --git a/mjpc/agent.cc b/mjpc/agent.cc
@@ -36,6 +36,10 @@ inline constexpr double kMinTimeStep = 1.0e-4;
 inline constexpr double kMaxTimeStep = 0.1;
 inline constexpr double kMinPlanningHorizon = 1.0e-5;
 inline constexpr double kMaxPlanningHorizon = 2.5;
+
+// maximum number of actions to plot
+const int kMaxActionPlots = 25;
+
 }  // namespace
 
 // initialize data, settings, planners, states
@@ -502,28 +506,30 @@ void Agent::PlotInitialize() {
   }
 
   // history of control
-  for (int i = 0; i < model_->nu; i++) {
+  int dim_action = mju_min(model_->nu, kMaxActionPlots);
+
+  for (int i = 0; i < dim_action; i++) {
     plots_.action.linergb[i][0] = 0.0f;
     plots_.action.linergb[i][1] = 1.0f;
     plots_.action.linergb[i][2] = 1.0f;
   }
 
   // best control
-  for (int i = 0; i < model_->nu; i++) {
-    plots_.action.linergb[model_->nu + i][0] = 1.0f;
-    plots_.action.linergb[model_->nu + i][1] = 0.0f;
-    plots_.action.linergb[model_->nu + i][2] = 1.0f;
+  for (int i = 0; i < dim_action; i++) {
+    plots_.action.linergb[dim_action + i][0] = 1.0f;
+    plots_.action.linergb[dim_action + i][1] = 0.0f;
+    plots_.action.linergb[dim_action + i][2] = 1.0f;
   }
 
   // current line
-  plots_.action.linergb[2 * model_->nu][0] = 1.0f;
-  plots_.action.linergb[2 * model_->nu][1] = 0.647f;
-  plots_.action.linergb[2 * model_->nu][2] = 0.0f;
+  plots_.action.linergb[2 * dim_action][0] = 1.0f;
+  plots_.action.linergb[2 * dim_action][1] = 0.647f;
+  plots_.action.linergb[2 * dim_action][2] = 0.0f;
 
   // policy line
-  plots_.action.linergb[2 * model_->nu + 1][0] = 1.0f;
-  plots_.action.linergb[2 * model_->nu + 1][1] = 0.647f;
-  plots_.action.linergb[2 * model_->nu + 1][2] = 0.0f;
+  plots_.action.linergb[2 * dim_action + 1][0] = 1.0f;
+  plots_.action.linergb[2 * dim_action + 1][1] = 0.647f;
+  plots_.action.linergb[2 * dim_action + 1][2] = 0.0f;
 
   // history of agent compute time
   plots_.timer.linergb[0][0] = 0.0f;
@@ -575,7 +581,7 @@ void Agent::PlotReset() {
   }
 
   // action reset
-  for (int j = 0; j < 2 * model_->nu + 2; j++) {
+  for (int j = 0; j < 2 * mju_min(model_->nu, kMaxActionPlots) + 2; j++) {
     PlotResetData(&plots_.action, 1000, j);
   }
 
@@ -666,37 +672,39 @@ void Agent::Plots(const mjData* data, int shift) {
   // ----- action ----- //
   double action_bounds[2] = {-1.0, 1.0};
 
+  int dim_action = mju_min(model_->nu, kMaxActionPlots);
+
   // shift data
   if (shift) {
     // agent history
-    for (int j = 0; j < model_->nu; j++) {
+    for (int j = 0; j < dim_action; j++) {
       PlotUpdateData(&plots_.action, action_bounds, data->time, data->ctrl[j],
                      1000, j, 1, 1, time_lower_bound);
     }
   }
 
   // agent actions
   PlotData(&plots_.action, action_bounds, winner->times.data(),
-           winner->actions.data(), model_->nu, model_->nu, winner->horizon,
-           model_->nu, time_lower_bound);
+           winner->actions.data(), model_->nu, dim_action, winner->horizon,
+           dim_action, time_lower_bound);
 
   // set final action for visualization
-  for (int j = 0; j < model_->nu; j++) {
+  for (int j = 0; j < dim_action; j++) {
     // set data
     if (winner->horizon > 1) {
-      plots_.action.linedata[model_->nu + j][2 * (winner->horizon - 1) + 1] =
+      plots_.action.linedata[dim_action + j][2 * (winner->horizon - 1) + 1] =
           winner->actions[(winner->horizon - 2) * model_->nu + j];
     } else {
-      plots_.action.linedata[model_->nu + j][2 * (winner->horizon - 1) + 1] = 0;
+      plots_.action.linedata[dim_action + j][2 * (winner->horizon - 1) + 1] = 0;
     }
   }
 
   // vertical lines at current time and agent time
   PlotVertical(&plots_.action, data->time, action_bounds[0], action_bounds[1],
-               10, 2 * model_->nu);
+               10, 2 * dim_action);
   PlotVertical(&plots_.action,
                (winner->times[0] > 0.0 ? winner->times[0] : data->time),
-               action_bounds[0], action_bounds[1], 10, 2 * model_->nu + 1);
+               action_bounds[0], action_bounds[1], 10, 2 * dim_action + 1);
 
   // ranges
   plots_.action.range[0][0] = data->time - horizon_ + model_->opt.timestep;
@@ -706,7 +714,7 @@ void Agent::Plots(const mjData* data, int shift) {
 
   // legend
   mju::strcpy_arr(plots_.action.linename[0], "History");
-  mju::strcpy_arr(plots_.action.linename[model_->nu], "Prediction");
+  mju::strcpy_arr(plots_.action.linename[dim_action], "Prediction");
 
   // ----- planner ----- //
 

diff --git a/mjpc/planners/gradient/planner.cc b/mjpc/planners/gradient/planner.cc
@@ -53,7 +53,7 @@ void GradientPlanner::Initialize(mjModel* model, const Task& task) {
       2 * model->nv + model->na;    // state derivative dimension
   dim_action = model->nu;           // action dimension
   dim_sensor = model->nsensordata;  // number of sensor values
-  dim_max = 10 * mju_max(mju_max(mju_max(dim_state, dim_state_derivative),
+  dim_max = mju_max(mju_max(mju_max(dim_state, dim_state_derivative),
                                  dim_action),
                          model->nuser_sensor);
   num_trajectory = GetNumberOrDefault(32, model, "gradient_num_trajectory");

diff --git a/mjpc/planners/ilqg/backward_pass.cc b/mjpc/planners/ilqg/backward_pass.cc
@@ -170,7 +170,7 @@ int iLQGBackwardPass::RiccatiStep(
                           boxqp.H.data(), boxqp.g.data(), m, boxqp.lower.data(),
                           boxqp.upper.data());
     if (mFree < 0) {
-      // printf("backward_pass failure\n");
+      printf("backward_pass failure\n");
       return 0;
     }
 
@@ -198,7 +198,12 @@ int iLQGBackwardPass::RiccatiStep(
   } else {
     // Quut^-1
     mju_copy(tmp3, Quu_reg, m * m);
-    mju_cholFactor(tmp3, m, 0.0);
+    int rank = mju_cholFactor(tmp3, m, 0.0);
+
+    if (rank < m) {
+      printf("backward pass failure\n");
+      return 0;
+    }
 
     // Kt = - Quut \ Qxut
     for (int i = 0; i < n; i++) {

diff --git a/mjpc/planners/ilqg/planner.cc b/mjpc/planners/ilqg/planner.cc
@@ -51,7 +51,7 @@ void iLQGPlanner::Initialize(mjModel* model, const Task& task) {
       2 * model->nv + model->na;    // state derivative dimension
   dim_action = model->nu;           // action dimension
   dim_sensor = model->nsensordata;  // number of sensor values
-  dim_max = 10 * mju_max(mju_max(mju_max(dim_state, dim_state_derivative),
+  dim_max = mju_max(mju_max(mju_max(dim_state, dim_state_derivative),
                                  dim_action),
                          model->nuser_sensor);
   num_trajectory = GetNumberOrDefault(10, model, "ilqg_num_rollouts");

diff --git a/mjpc/tasks/humanoid/stand/task.cc b/mjpc/tasks/humanoid/stand/task.cc
@@ -0,0 +1,101 @@
+// Copyright 2022 DeepMind Technologies Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tasks/humanoid/stand/task.h"
+
+#include <iostream>
+
+#include <mujoco/mujoco.h>
+#include "utilities.h"
+
+
+namespace mjpc {
+
+// ------------------ Residuals for humanoid stand task ------------
+//   Number of residuals: 6
+//     Residual (0): Desired height
+//     Residual (1): Balance: COM_xy - average(feet position)_xy
+//     Residual (2): Com Vel: should be 0 and equal feet average vel
+//     Residual (3): Control: minimise control
+//     Residual (4): Joint vel: minimise joint velocity
+//   Number of parameters: 1
+//     Parameter (0): height_goal
+// ----------------------------------------------------------------
+void humanoid::Stand::Residual(const double* parameters, const mjModel* model,
+                               const mjData* data, double* residual) {
+  int counter = 0;
+
+  // ----- Height: head feet vertical error ----- //
+
+  // feet sensor positions
+  double* f1_position = mjpc::SensorByName(model, data, "sp0");
+  double* f2_position = mjpc::SensorByName(model, data, "sp1");
+  double* f3_position = mjpc::SensorByName(model, data, "sp2");
+  double* f4_position = mjpc::SensorByName(model, data, "sp3");
+  double* head_position = mjpc::SensorByName(model, data, "head_position");
+  double head_feet_error =
+      head_position[2] - 0.25 * (f1_position[2] + f2_position[2] +
+                                 f3_position[2] + f4_position[2]);
+  residual[counter++] = head_feet_error - parameters[0];
+
+  // ----- Balance: CoM-feet xy error ----- //
+
+  // capture point
+  double* com_position = mjpc::SensorByName(model, data, "torso_subtreecom");
+  double* com_velocity = mjpc::SensorByName(model, data, "torso_subtreelinvel");
+  double kFallTime = 0.2;
+  double capture_point[3] = {com_position[0], com_position[1], com_position[2]};
+  mju_addToScl3(capture_point, com_velocity, kFallTime);
+
+  // average feet xy position
+  double fxy_avg[2] = {0.0};
+  mju_addTo(fxy_avg, f1_position, 2);
+  mju_addTo(fxy_avg, f2_position, 2);
+  mju_addTo(fxy_avg, f3_position, 2);
+  mju_addTo(fxy_avg, f4_position, 2);
+  mju_scl(fxy_avg, fxy_avg, 0.25, 2);
+
+  mju_subFrom(fxy_avg, capture_point, 2);
+  double com_feet_distance = mju_norm(fxy_avg, 2);
+  residual[counter++] = com_feet_distance;
+
+  // ----- COM xy velocity should be 0 ----- //
+  mju_copy(&residual[counter], com_velocity, 2);
+  counter += 2;
+
+  // ----- joint velocity ----- //
+  mju_copy(residual + counter, data->qvel + 6, model->nv - 6);
+  counter += model->nv - 6;
+
+  // ----- action ----- //
+  mju_copy(&residual[counter], data->ctrl, model->nu);
+  counter += model->nu;
+
+  // sensor dim sanity check
+  // TODO: use this pattern everywhere and make this a utility function
+  int user_sensor_dim = 0;
+  for (int i = 0; i < model->nsensor; i++) {
+    if (model->sensor_type[i] == mjSENS_USER) {
+      user_sensor_dim += model->sensor_dim[i];
+    }
+  }
+  if (user_sensor_dim != counter) {
+    mju_error_i(
+        "mismatch between total user-sensor dimension "
+        "and actual length of residual %d",
+        counter);
+  }
+}
+
+}  // namespace mjpc
diff --git a/mjpc/tasks/humanoid/stand/task.h b/mjpc/tasks/humanoid/stand/task.h
@@ -0,0 +1,44 @@
+// Copyright 2022 DeepMind Technologies Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MJPC_TASKS_HUMANOID_STAND_TASK_H_
+#define MJPC_TASKS_HUMANOID_STAND_TASK_H_
+
+#include <mujoco/mujoco.h>
+
+namespace mjpc {
+namespace humanoid {
+
+struct Stand {
+
+  // ------------------ Residuals for humanoid stand task ------------
+  //   Number of residuals: 6
+  //     Residual (0): control
+  //     Residual (1): COM_xy - average(feet position)_xy
+  //     Residual (2): torso_xy - COM_xy
+  //     Residual (3): head_z - feet^{(i)}_position_z - height_goal
+  //     Residual (4): velocity COM_xy
+  //     Residual (5): joint velocity
+  //   Number of parameters: 1
+  //     Parameter (0): height_goal
+  // ----------------------------------------------------------------
+  static void Residual(const double* parameters, const mjModel* model,
+                       const mjData* data, double* residual);
+
+};
+
+}  // namespace humanoid
+}  // namespace mjpc
+
+#endif  // MJPC_TASKS_HUMANOID_STAND_TASK_H_
diff --git a/mjpc/tasks/humanoid/task_stand.xml → mjpc/tasks/humanoid/stand/task.xml b/mjpc/tasks/humanoid/task_stand.xml → mjpc/tasks/humanoid/stand/task.xml
@@ -1,6 +1,6 @@
 <mujoco model="Humanoid">
-  <include file="../common.xml"/>
-  <include file="humanoid.xml" />
+  <include file="../../common.xml"/>
+  <include file="../humanoid.xml" />
   <size memory="400K"/>
 
   <custom>