From 623894f5dafefc80e2c6ae93e7f20fabbc9273e4 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Sun, 6 Aug 2023 07:55:46 +0000 Subject: [PATCH] fix bug, have to use raw_model not model to access the loss --- train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index 811dd8ac..dbf0b240 100644 --- a/train.py +++ b/train.py @@ -212,7 +212,7 @@ def estimate_loss(): X, Y = next(batch_iter) with ctx: logits = model(X, Y) - loss = model.last_loss + loss = raw_model.last_loss losses[k] = loss.item() out[split] = losses.mean() model.train() @@ -296,7 +296,7 @@ def get_lr(it): model.require_backward_grad_sync = micro_step == gradient_accumulation_steps - 1 with ctx: logits = model(X, Y) - loss = model.last_loss + loss = raw_model.last_loss loss = loss / gradient_accumulation_steps # immediately async prefetch next batch while model is doing the forward pass on the GPU X, Y = next(train_batch_iter)