@@ -516,7 +516,7 @@ def append_transform(
516
516
self ,
517
517
transform : "Transform" # noqa: F821
518
518
| Callable [[TensorDictBase ], TensorDictBase ],
519
- ) -> None :
519
+ ) -> EnvBase :
520
520
"""Returns a transformed environment where the callable/transform passed is applied.
521
521
522
522
Args:
@@ -1482,7 +1482,8 @@ def full_state_spec(self, spec: Composite) -> None:
1482
1482
1483
1483
# Single-env specs can be used to remove the batch size from the spec
1484
1484
@property
1485
- def batch_dims (self ):
1485
+ def batch_dims (self ) -> int :
1486
+ """Number of batch dimensions of the env."""
1486
1487
return len (self .batch_size )
1487
1488
1488
1489
def _make_single_env_spec (self , spec : TensorSpec ) -> TensorSpec :
@@ -2444,11 +2445,11 @@ def rollout(
2444
2445
set_truncated : bool = False ,
2445
2446
out = None ,
2446
2447
trust_policy : bool = False ,
2447
- ):
2448
+ ) -> TensorDictBase :
2448
2449
"""Executes a rollout in the environment.
2449
2450
2450
- The function will stop as soon as one of the contained environments
2451
- returns done=True .
2451
+ The function will return as soon as any of the contained environments
2452
+ reaches any of the done states .
2452
2453
2453
2454
Args:
2454
2455
max_steps (int): maximum number of steps to be executed. The actual number of steps can be smaller if
@@ -2464,14 +2465,16 @@ def rollout(
2464
2465
the call to ``rollout``.
2465
2466
2466
2467
Keyword Args:
2467
- auto_reset (bool, optional): if ``True``, resets automatically the environment
2468
- if it is in a done state when the rollout is initiated.
2469
- Default is ``True``.
2468
+ auto_reset (bool, optional): if ``True``, the contained environments will be reset before starting the
2469
+ rollout. If ``False``, then the rollout will continue from a previous state, which requires the
2470
+ ``tensordict`` argument to be passed with the previous rollout. Default is ``True``.
2470
2471
auto_cast_to_device (bool, optional): if ``True``, the device of the tensordict is automatically cast to the
2471
2472
policy device before the policy is used. Default is ``False``.
2472
- break_when_any_done (bool): breaks if any of the done state is True. If False, a reset() is
2473
- called on the sub-envs that are done. Default is True.
2474
- break_when_all_done (bool): TODO
2473
+ break_when_any_done (bool): if ``True``, break when any of the contained environments reaches any of the
2474
+ done states. If ``False``, then the done environments are reset automatically. Default is ``True``.
2475
+ break_when_all_done (bool, optional): if ``True``, break if all of the contained environments reach any
2476
+ of the done states. If ``False``, break if at least one environment reaches any of the done states.
2477
+ Default is ``False``.
2475
2478
return_contiguous (bool): if False, a LazyStackedTensorDict will be returned. Default is True.
2476
2479
tensordict (TensorDict, optional): if ``auto_reset`` is False, an initial
2477
2480
tensordict must be provided. Rollout will check if this tensordict has done flags and reset the
0 commit comments