Merge pull request google-deepmind#58 from thowell/main

Minor fixes
MacroJou · Jan 26, 2023 · 76b75ce · 76b75ce
2 parents 67af331 + b54ab8e
commit 76b75ce
Show file tree

Hide file tree

Showing 8 changed files with 55 additions and 23 deletions.
diff --git a/mjpc/agent.cc b/mjpc/agent.cc
@@ -36,6 +36,10 @@ inline constexpr double kMinTimeStep = 1.0e-4;
 inline constexpr double kMaxTimeStep = 0.1;
 inline constexpr double kMinPlanningHorizon = 1.0e-5;
 inline constexpr double kMaxPlanningHorizon = 2.5;
+
+// maximum number of actions to plot
+const int kMaxActionPlots = 25;
+
 }  // namespace
 
 // initialize data, settings, planners, states
@@ -502,28 +506,30 @@ void Agent::PlotInitialize() {
   }
 
   // history of control
-  for (int i = 0; i < model_->nu; i++) {
+  int dim_action = mju_min(model_->nu, kMaxActionPlots);
+
+  for (int i = 0; i < dim_action; i++) {
     plots_.action.linergb[i][0] = 0.0f;
     plots_.action.linergb[i][1] = 1.0f;
     plots_.action.linergb[i][2] = 1.0f;
   }
 
   // best control
-  for (int i = 0; i < model_->nu; i++) {
-    plots_.action.linergb[model_->nu + i][0] = 1.0f;
-    plots_.action.linergb[model_->nu + i][1] = 0.0f;
-    plots_.action.linergb[model_->nu + i][2] = 1.0f;
+  for (int i = 0; i < dim_action; i++) {
+    plots_.action.linergb[dim_action + i][0] = 1.0f;
+    plots_.action.linergb[dim_action + i][1] = 0.0f;
+    plots_.action.linergb[dim_action + i][2] = 1.0f;
   }
 
   // current line
-  plots_.action.linergb[2 * model_->nu][0] = 1.0f;
-  plots_.action.linergb[2 * model_->nu][1] = 0.647f;
-  plots_.action.linergb[2 * model_->nu][2] = 0.0f;
+  plots_.action.linergb[2 * dim_action][0] = 1.0f;
+  plots_.action.linergb[2 * dim_action][1] = 0.647f;
+  plots_.action.linergb[2 * dim_action][2] = 0.0f;
 
   // policy line
-  plots_.action.linergb[2 * model_->nu + 1][0] = 1.0f;
-  plots_.action.linergb[2 * model_->nu + 1][1] = 0.647f;
-  plots_.action.linergb[2 * model_->nu + 1][2] = 0.0f;
+  plots_.action.linergb[2 * dim_action + 1][0] = 1.0f;
+  plots_.action.linergb[2 * dim_action + 1][1] = 0.647f;
+  plots_.action.linergb[2 * dim_action + 1][2] = 0.0f;
 
   // history of agent compute time
   plots_.timer.linergb[0][0] = 0.0f;
@@ -575,7 +581,7 @@ void Agent::PlotReset() {
   }
 
   // action reset
-  for (int j = 0; j < 2 * model_->nu + 2; j++) {
+  for (int j = 0; j < 2 * mju_min(model_->nu, kMaxActionPlots) + 2; j++) {
     PlotResetData(&plots_.action, 1000, j);
   }
 
@@ -666,37 +672,39 @@ void Agent::Plots(const mjData* data, int shift) {
   // ----- action ----- //
   double action_bounds[2] = {-1.0, 1.0};
 
+  int dim_action = mju_min(model_->nu, kMaxActionPlots);
+
   // shift data
   if (shift) {
     // agent history
-    for (int j = 0; j < model_->nu; j++) {
+    for (int j = 0; j < dim_action; j++) {
       PlotUpdateData(&plots_.action, action_bounds, data->time, data->ctrl[j],
                      1000, j, 1, 1, time_lower_bound);
     }
   }
 
   // agent actions
   PlotData(&plots_.action, action_bounds, winner->times.data(),
-           winner->actions.data(), model_->nu, model_->nu, winner->horizon,
-           model_->nu, time_lower_bound);
+           winner->actions.data(), model_->nu, dim_action, winner->horizon,
+           dim_action, time_lower_bound);
 
   // set final action for visualization
-  for (int j = 0; j < model_->nu; j++) {
+  for (int j = 0; j < dim_action; j++) {
     // set data
     if (winner->horizon > 1) {
-      plots_.action.linedata[model_->nu + j][2 * (winner->horizon - 1) + 1] =
+      plots_.action.linedata[dim_action + j][2 * (winner->horizon - 1) + 1] =
           winner->actions[(winner->horizon - 2) * model_->nu + j];
     } else {
-      plots_.action.linedata[model_->nu + j][2 * (winner->horizon - 1) + 1] = 0;
+      plots_.action.linedata[dim_action + j][2 * (winner->horizon - 1) + 1] = 0;
     }
   }
 
   // vertical lines at current time and agent time
   PlotVertical(&plots_.action, data->time, action_bounds[0], action_bounds[1],
-               10, 2 * model_->nu);
+               10, 2 * dim_action);
   PlotVertical(&plots_.action,
                (winner->times[0] > 0.0 ? winner->times[0] : data->time),
-               action_bounds[0], action_bounds[1], 10, 2 * model_->nu + 1);
+               action_bounds[0], action_bounds[1], 10, 2 * dim_action + 1);
 
   // ranges
   plots_.action.range[0][0] = data->time - horizon_ + model_->opt.timestep;
@@ -706,7 +714,7 @@ void Agent::Plots(const mjData* data, int shift) {
 
   // legend
   mju::strcpy_arr(plots_.action.linename[0], "History");
-  mju::strcpy_arr(plots_.action.linename[model_->nu], "Prediction");
+  mju::strcpy_arr(plots_.action.linename[dim_action], "Prediction");
 
   // ----- planner ----- //
 

diff --git a/mjpc/planners/gradient/planner.cc b/mjpc/planners/gradient/planner.cc
@@ -53,7 +53,7 @@ void GradientPlanner::Initialize(mjModel* model, const Task& task) {
       2 * model->nv + model->na;    // state derivative dimension
   dim_action = model->nu;           // action dimension
   dim_sensor = model->nsensordata;  // number of sensor values
-  dim_max = 10 * mju_max(mju_max(mju_max(dim_state, dim_state_derivative),
+  dim_max = mju_max(mju_max(mju_max(dim_state, dim_state_derivative),
                                  dim_action),
                          model->nuser_sensor);
   num_trajectory = GetNumberOrDefault(32, model, "gradient_num_trajectory");

diff --git a/mjpc/planners/ilqg/planner.cc b/mjpc/planners/ilqg/planner.cc
@@ -51,7 +51,7 @@ void iLQGPlanner::Initialize(mjModel* model, const Task& task) {
       2 * model->nv + model->na;    // state derivative dimension
   dim_action = model->nu;           // action dimension
   dim_sensor = model->nsensordata;  // number of sensor values
-  dim_max = 10 * mju_max(mju_max(mju_max(dim_state, dim_state_derivative),
+  dim_max = mju_max(mju_max(mju_max(dim_state, dim_state_derivative),
                                  dim_action),
                          model->nuser_sensor);
   num_trajectory = GetNumberOrDefault(10, model, "ilqg_num_rollouts");

diff --git a/mjpc/tasks/particle/particle.cc b/mjpc/tasks/particle/particle.cc
@@ -15,6 +15,7 @@
 #include "tasks/particle/particle.h"
 
 #include <mujoco/mujoco.h>
+#include "task.h"
 #include "utilities.h"
 
 namespace mjpc {
@@ -63,4 +64,18 @@ void Particle::ResidualTimeVarying(const double* parameters,
   mju_copy(residual + 4, data->ctrl, model->nu);
 }
 
+int Particle::Transition(int state, const mjModel* model, mjData* data,
+                         Task* task) {
+  int new_state = state;
+
+  // some Lissajous curve
+  double goal[2] {0.25 * mju_sin(data->time), 0.25 * mju_cos(data->time / mjPI)};
+
+  // update mocap position
+  data->mocap_pos[0] = goal[0];
+  data->mocap_pos[1] = goal[1];
+
+  return new_state;
+}
+
 }  // namespace mjpc
diff --git a/mjpc/tasks/particle/particle.h b/mjpc/tasks/particle/particle.h
@@ -16,6 +16,7 @@
 #define MJPC_TASKS_PARTICLE_PARTICLE_H_
 
 #include <mujoco/mujoco.h>
+#include "task.h"
 
 namespace mjpc {
 struct Particle {
@@ -31,6 +32,9 @@ static void Residual(const double* parameters, const mjModel* model,
 static void ResidualTimeVarying(const double* parameters, const mjModel* model,
                                 const mjData* data, double* residual);
 
+static int Transition(int state, const mjModel* model, mjData* data,
+                                Task* task);
+
 };
 }  // namespace mjpc
 

diff --git a/mjpc/tasks/particle/particle.xml b/mjpc/tasks/particle/particle.xml
@@ -9,6 +9,9 @@
   </default>
 
   <worldbody>
+    <body name="goal" mocap="true" pos="0.25 0 0.01" quat="1 0 0 0">
+        <geom type="sphere" size=".01" contype="0" conaffinity="0" rgba="0 1 0 .5"/>
+    </body>
     <light name="light" pos="0 0 1"/>
     <camera name="fixed" pos="0 0 .75" quat="1 0 0 0"/>
     <geom name="ground" type="plane" pos="0 0 0" size=".3 .3 .1" material="blue_grid"/>

diff --git a/mjpc/tasks/particle/task_timevarying.xml b/mjpc/tasks/particle/task_timevarying.xml
@@ -5,6 +5,7 @@
   <size memory="10K"/>
 
   <custom>
+    <numeric name="task_transition" data="1" />
     <numeric name="task_risk" data="1" />
     <numeric name="agent_planner" data="0" />
     <numeric name="agent_horizon" data="0.5" />

diff --git a/mjpc/tasks/tasks.cc b/mjpc/tasks/tasks.cc
@@ -69,6 +69,7 @@ const TaskDefinition<const char*> kTasksArray[]{
         .name = "Particle",
         .xml_path = "particle/task_timevarying.xml",
         .residual = &Particle::ResidualTimeVarying,
+        .transition = &Particle::Transition,
     },
     {
         .name = "Quadruped Hill",