Skip to content
This repository was archived by the owner on Sep 8, 2024. It is now read-only.

Commit bc7a7e6

Browse files
committed
puffer works
1 parent 20517ee commit bc7a7e6

14 files changed

+295
-246
lines changed

.vscode/launch.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@
1919
// "sample_factory.aux_loss_coeff=0.1",
2020

2121
"+framework.device=cpu",
22-
"+framework.experiment=baseline.v0.4.1",
22+
"framework.experiment=mac.puffer.2",
2323
"+framework.train_dir=./train_dir",
24+
// "framework.wandb.track=True",
2425

2526
// "sample_factory.wandb_project=mp_rws"
2627
// "+sample_factory.restart_behavior=restart",

agent/metta_agent.py

+14-36
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def __init__(
2626
):
2727
super().__init__()
2828
cfg = OmegaConf.create(cfg)
29+
self._cfg = cfg
2930
self._observation_space = obs_space
3031
self._action_space = action_space
3132

@@ -35,44 +36,34 @@ def __init__(
3536
cfg.fc.layers,
3637
cfg.fc.output_dim
3738
)
38-
3939
self._core = ModelCoreRNN(cfg.core, cfg.fc.output_dim)
4040

4141
self._decoder = hydra.utils.instantiate(
4242
cfg.decoder,
4343
self._core.get_out_size())
4444

45-
decoder_out_size: int = self._decoder.get_out_size()
46-
47-
self._critic_linear = nn.Linear(decoder_out_size, 1)
48-
self._action_parameterization = self.get_action_parameterization(decoder_out_size)
49-
self._last_action_distribution = None
45+
self._critic_linear = nn.Linear(self.decoder_out_size, 1)
5046

5147
self.apply(self.initialize_weights)
5248

49+
@property
50+
def decoder_out_size(self):
51+
return self._decoder.get_out_size()
52+
53+
@property
54+
def core_out_size(self):
55+
return self._cfg.core.rnn_size
56+
5357
def encode_observations(self, td: TensorDict):
54-
td["encoded_"] = self._encoder(obs_dict)
55-
return x
58+
td["encoded_obs"] = self._encoder(td["obs"])
5659

5760
def forward_core(self, head_output: Tensor, rnn_states):
5861
x, new_rnn_states = self._core(head_output, rnn_states)
5962
return x, new_rnn_states
6063

61-
def forward_tail(self, core_output, values_only: bool, sample_actions: bool) -> TensorDict:
62-
decoder_output = self._decoder(core_output)
63-
values = self._critic_linear(decoder_output).squeeze()
64-
65-
result = TensorDict({"values": values}, batch_size=values.size(0))
66-
if values_only:
67-
return result
68-
69-
action_distribution_params, self._last_action_distribution = self._action_parameterization(decoder_output)
70-
71-
# `action_logits` is not the best name here, better would be "action distribution parameters"
72-
result["action_logits"] = action_distribution_params
73-
74-
self._maybe_sample_actions(sample_actions, result)
75-
return result
64+
def forward_tail(self, td: TensorDict):
65+
td["decoder_output"] = self._decoder(td["core_output"])
66+
td["values"] = self._critic_linear(td["decoder_output"]).squeeze()
7667

7768
def forward(self, normalized_obs_dict, rnn_states, values_only=False) -> TensorDict:
7869
x = self.encode_observations(normalized_obs_dict)
@@ -98,16 +89,3 @@ def initialize_weights(self, layer):
9889
# I never noticed much difference between different initialization schemes, and here it seems safer to
9990
# go with default initialization,
10091
pass
101-
102-
def get_action_parameterization(self, decoder_output_size: int):
103-
return ActionParameterizationDefault({}, decoder_output_size, self._action_space)
104-
105-
def _maybe_sample_actions(self, sample_actions: bool, result: TensorDict) -> None:
106-
if sample_actions:
107-
# for non-trivial action spaces it is faster to do these together
108-
actions, result["log_prob_actions"] = sample_actions_log_probs(self._last_action_distribution)
109-
assert actions.dim() == 2 # TODO: remove this once we test everything
110-
result["actions"] = actions.squeeze(dim=1)
111-
112-
def action_distribution(self):
113-
return self._last_action_distribution

configs/agent/agent.yaml

-2
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,6 @@ decoder:
5656

5757

5858
core:
59-
_target_: sample_factory.model.core.ModelCoreRNN
60-
6159
rnn_type: gru
6260
rnn_num_layers: 1
6361
rnn_size: 512
+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# @package framework
2+
3+
_target_: rl_framework.pufferlib.pufferlib.PufferLibFramework
4+
5+
package: metta
6+
7+
8+
baseline: False
9+
render_mode: auto
10+
vectorization: serial
11+
eval_mode_path: ""
12+
mode: train
13+
backend: clean_pufferl
14+
experiment: ???
15+
16+
wandb:
17+
track: False
18+
project: metta
19+
entity: platypus
20+
group: platypus
21+
name: ${framework.experiment}
22+
23+
24+
train:
25+
exp_id: ${framework.experiment}
26+
track: False
27+
env: ${env.name}
28+
seed: 1
29+
torch_deterministic: True
30+
cpu_offload: False
31+
device: cpu
32+
total_timesteps: 10_000_000
33+
learning_rate: 2.5e-4
34+
num_steps: 128
35+
anneal_lr: True
36+
gamma: 0.99
37+
gae_lambda: 0.95
38+
num_minibatches: 4
39+
update_epochs: 4
40+
norm_adv: True
41+
clip_coef: 0.1
42+
clip_vloss: True
43+
ent_coef: 0.01
44+
vf_coef: 0.5
45+
max_grad_norm: 0.5
46+
target_kl: null
47+
48+
num_envs: 8
49+
num_workers: 8
50+
env_batch_size: null
51+
zero_copy: True
52+
verbose: True
53+
data_dir: experiments
54+
checkpoint_interval: 200
55+
batch_size: 1024
56+
minibatch_size: 512
57+
bptt_horizon: 16
58+
vf_clip_coef: 0.1
59+
compile: False
60+
compile_mode: reduce-overhead
61+
62+
sweep:
63+
method: bayes
64+
name: sweep
65+
metric:
66+
goal: maximize
67+
name: environment/episode_return
68+
# Nested parameters name required by WandB API
69+
parameters:
70+
train:
71+
parameters:
72+
ent_coef: {
73+
'distribution': 'log_uniform_values',
74+
'min': 1e-3,
75+
'max': 5e-2,
76+
}
77+
gamma: {
78+
'values': [0.90, 0.925, 0.95, 0.975, 0.99],
79+
}
80+
gae_lambda: {
81+
'values': [0.90, 0.925, 0.95, 0.975, 0.99],
82+
}
83+
learning_rate: {
84+
'distribution': 'log_uniform_values',
85+
'min': 1e-4,
86+
'max': 1e-1,
87+
}
88+
batch_size: {
89+
'values': [4096, 8192, 16384, 32768, 65536],
90+
}
91+
minibatch_size: {
92+
'values': [1024, 2048, 4096, 8192, 16384],
93+
}
94+
bptt_horizon: {
95+
'values': [1, 2, 4, 8, 16],
96+
}
+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# @package framework
2+
3+
defaults:
4+
- defaults

configs/framework/pufferlib/train/sand.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@ train:
2020
minibatch_size: 1024
2121
compile: False
2222
anneal_lr: False
23+
checkpoint_interval: 10

experiments/sweep_config.yaml

-152
This file was deleted.

metta.code-workspace

+3
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@
8383
"__verbose_abort": "cpp",
8484
"execution": "cpp"
8585
},
86+
"python.analysis.extraPaths": [
87+
"./third_party/pufferlib"
88+
],
8689

8790
},
8891

rl_framework/pufferlib/__init__.py

-8
This file was deleted.

rl_framework/pufferlib/clean_pufferl.py

+1
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,7 @@ def run(self):
506506
else:
507507
self.gpu_util.append(1)
508508
self.gpu_mem.append(1)
509+
509510
time.sleep(self.delay)
510511

511512
def stop(self):

0 commit comments

Comments
 (0)