From 623894f5dafefc80e2c6ae93e7f20fabbc9273e4 Mon Sep 17 00:00:00 2001
From: Andrej Karpathy <andrej.karpathy@gmail.com>
Date: Sun, 6 Aug 2023 07:55:46 +0000
Subject: [PATCH] fix bug, have to use raw_model not model to access the loss

---
 train.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/train.py b/train.py
index 811dd8ac..dbf0b240 100644
--- a/train.py
+++ b/train.py
@@ -212,7 +212,7 @@ def estimate_loss():
             X, Y = next(batch_iter)
             with ctx:
                 logits = model(X, Y)
-                loss = model.last_loss
+                loss = raw_model.last_loss
             losses[k] = loss.item()
         out[split] = losses.mean()
     model.train()
@@ -296,7 +296,7 @@ def get_lr(it):
             model.require_backward_grad_sync = micro_step == gradient_accumulation_steps - 1
         with ctx:
             logits = model(X, Y)
-            loss = model.last_loss
+            loss = raw_model.last_loss
             loss = loss / gradient_accumulation_steps
         # immediately async prefetch next batch while model is doing the forward pass on the GPU
         X, Y = next(train_batch_iter)