fix: all envs now construct entities correctly

epignatelli · Jun 7, 2024 · db4310d · db4310d
1 parent dff090b
commit db4310d
Show file tree

Hide file tree

Showing 4 changed files with 10 additions and 19 deletions.
diff --git a/baselines/ppo.py b/baselines/ppo.py
@@ -38,26 +38,17 @@ class Args:
         wandb.init(project=args.project_name, config=config)
 
         # init environment
-        env = FlattenObsWrapper(nx.make(env_id))
+        env = nx.make(env_id)
+        env = FlattenObsWrapper(env)
 
         # create agent
-        network = nn.Sequential(
-            [
-                nn.Dense(
-                    64, kernel_init=orthogonal(np.sqrt(2)), bias_init=constant(0.0)
-                ),
-                nn.tanh,
-                nn.Dense(
-                    64, kernel_init=orthogonal(np.sqrt(2)), bias_init=constant(0.0)
-                ),
-                nn.tanh,
-            ]
-        )
         agent = PPO(
             hparams=args.ppo,
             network=ActorCritic(action_dim=len(env.action_set)),
             env=env,
         )
+
+        # run experiment
         experiment = nx.Experiment(
             name=args.project_name,
             agent=agent,

diff --git a/navix/agents/ppo.py b/navix/agents/ppo.py
@@ -22,7 +22,7 @@
 from navix.environments.environment import Timestep
 from navix.states import State
 
-from .models import ActorCriticRNN
+from .models import ActorCritic
 
 
 @dataclass
@@ -87,7 +87,7 @@ class TrainingState(TrainState):
 
 class PPO(Agent):
     hparams: PPOHparams = struct.field(pytree_node=False)
-    network: ActorCriticRNN = struct.field(pytree_node=False)
+    network: ActorCritic = struct.field(pytree_node=False)
     env: Environment
 
     def collect_experience(

diff --git a/navix/environments/dynamic_obstacles.py b/navix/environments/dynamic_obstacles.py
@@ -63,7 +63,7 @@ def _reset(self, key: Array, cache: Union[RenderingCache, None] = None) -> Times
         )
         # goal
         goal_pos = jnp.asarray([self.height - 2, self.width - 2])
-        goal = Goal(position=goal_pos, probability=jnp.asarray(1.0))
+        goal = Goal.create(position=goal_pos, probability=jnp.asarray(1.0))
 
         # balls
         exclude = jnp.stack([player_pos, goal_pos])

diff --git a/navix/environments/lava_gap.py b/navix/environments/lava_gap.py
@@ -94,7 +94,7 @@ def _reset(self, key: Array, cache: Union[RenderingCache, None] = None) -> Times
 
 
 register_env(
-    "Navix-DoorKey-S5-v0",
+    "Navix-LavaGap-S5-v0",
     lambda *args, **kwargs: LavaGap.create(
         *args,
         **kwargs,
@@ -106,7 +106,7 @@ def _reset(self, key: Array, cache: Union[RenderingCache, None] = None) -> Times
     ),
 )
 register_env(
-    "Navix-DoorKey-S6-v0",
+    "Navix-LavaGap-S6-v0",
     lambda *args, **kwargs: LavaGap.create(
         *args,
         **kwargs,
@@ -118,7 +118,7 @@ def _reset(self, key: Array, cache: Union[RenderingCache, None] = None) -> Times
     ),
 )
 register_env(
-    "Navix-DoorKey-S7-v0",
+    "Navix-LavaGap-S7-v0",
     lambda *args, **kwargs: LavaGap.create(
         *args,
         **kwargs,