Skip to content

Commit 2c5f78d

Browse files
committed
chore: fixed configs
1 parent 324c436 commit 2c5f78d

File tree

3 files changed

+31
-19
lines changed

3 files changed

+31
-19
lines changed

tutorials/getting_started/configs/example_config.yaml

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -235,25 +235,22 @@ model_raw:
235235
seq_length_dim: -2
236236
base_freq: 10000
237237
attention_implementation: manual
238-
activation_type: gelu
238+
activation_type: swiglu
239239
attention_norm_config:
240-
norm_type: rms_norm
240+
norm_type: layer_norm
241241
config:
242-
ndim: ${model_raw.config.n_embd}
243-
bias: true
244-
epsilon: 1e-5
242+
normalized_shape: ${model_raw.config.n_embd}
243+
eps: 1.0e-05
245244
ffn_norm_config:
246-
norm_type: rms_norm
245+
norm_type: layer_norm
247246
config:
248-
ndim: ${model_raw.config.n_embd}
249-
bias: true
250-
epsilon: 1e-5
247+
normalized_shape: ${model_raw.config.n_embd}
248+
eps: 1.0e-05
251249
lm_head_norm_config:
252-
norm_type: rms_norm
250+
norm_type: layer_norm
253251
config:
254-
ndim: ${model_raw.config.n_embd}
255-
bias: true
256-
epsilon: 1e-5
252+
normalized_shape: ${model_raw.config.n_embd}
253+
eps: 1.0e-05
257254
use_weight_tying: true
258255

259256
lr_scheduler:
@@ -281,7 +278,7 @@ optimizer:
281278

282279
gradient_clipper:
283280
component_key: gradient_clipper
284-
variant_key: fsdp
281+
variant_key: fsdp1
285282
config:
286283
wrapped_model:
287284
instance_key: wrapped_model

tutorials/warmstart/configs/pre_training_config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ model_raw:
156156
variant_key: gpt2
157157
config:
158158
use_meta_device: true
159+
use_weight_tying: true
159160
sample_key: ${settings.referencing_keys.sample_key}
160161
poe_type: NOPE
161162
sequence_length: ${settings.step_profile.sequence_length}
@@ -196,7 +197,6 @@ model_raw:
196197
ndim: ${model_raw.config.n_embd}
197198
bias: true
198199
epsilon: 1e-5
199-
use_weight_tying: true
200200

201201
lr_scheduler:
202202
component_key: scheduler
@@ -228,7 +228,7 @@ optimizer:
228228

229229
gradient_clipper:
230230
component_key: gradient_clipper
231-
variant_key: fsdp
231+
variant_key: fsdp1
232232
config:
233233
wrapped_model:
234234
instance_key: wrapped_model

tutorials/warmstart/configs/warmstart_config.yaml

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ eval_dataloaders: []
104104

105105
checkpoint_loading:
106106
component_key: checkpoint_loading
107-
variant_key: fsdp
107+
variant_key: fsdp1
108108
config:
109109
global_rank: ${settings.cuda_env.global_rank}
110110
block_names: [GPT2Block]
@@ -135,9 +135,23 @@ loss_fn:
135135
target_key: ${settings.referencing_keys.target_key}
136136
prediction_key: ${settings.referencing_keys.prediction_key}
137137

138+
app_state:
139+
component_key: app_state
140+
variant_key: raw
141+
config:
142+
model:
143+
instance_key: wrapped_model
144+
pass_type: BY_REFERENCE
145+
optimizer:
146+
instance_key: optimizer
147+
pass_type: BY_REFERENCE
148+
lr_scheduler:
149+
instance_key: lr_scheduler
150+
pass_type: BY_REFERENCE
151+
138152
wrapped_model:
139153
component_key: model
140-
variant_key: checkpointed
154+
variant_key: fsdp1_checkpointed
141155
config:
142156
model:
143157
instance_key: model
@@ -169,6 +183,7 @@ model_raw:
169183
variant_key: gpt2
170184
config:
171185
use_meta_device: false
186+
use_weight_tying: true
172187
sample_key: ${settings.referencing_keys.sample_key}
173188
poe_type: NOPE
174189
sequence_length: ${settings.step_profile.sequence_length}
@@ -255,7 +270,7 @@ optimizer_original:
255270

256271
gradient_clipper:
257272
component_key: gradient_clipper
258-
variant_key: fsdp
273+
variant_key: fsdp1
259274
config:
260275
wrapped_model:
261276
instance_key: wrapped_model

0 commit comments

Comments
 (0)