Skip to content

Commit

Permalink
fix bug, have to use raw_model not model to access the loss
Browse files Browse the repository at this point in the history
  • Loading branch information
karpathy committed Aug 6, 2023
1 parent 65b0846 commit 623894f
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def estimate_loss():
X, Y = next(batch_iter)
with ctx:
logits = model(X, Y)
loss = model.last_loss
loss = raw_model.last_loss
losses[k] = loss.item()
out[split] = losses.mean()
model.train()
Expand Down Expand Up @@ -296,7 +296,7 @@ def get_lr(it):
model.require_backward_grad_sync = micro_step == gradient_accumulation_steps - 1
with ctx:
logits = model(X, Y)
loss = model.last_loss
loss = raw_model.last_loss
loss = loss / gradient_accumulation_steps
# immediately async prefetch next batch while model is doing the forward pass on the GPU
X, Y = next(train_batch_iter)
Expand Down

0 comments on commit 623894f

Please sign in to comment.