From 170756214c8f0587aec50e001b25d3858d323ea1 Mon Sep 17 00:00:00 2001
From: Kimish Patel <kimishpatel@fb.com>
Date: Tue, 16 Apr 2024 12:20:29 -0700
Subject: [PATCH] Fix model export for 7b (#222)

Summary:
- `n_layer` -> `n_layers`
- `str(checkpoint_path)`
- `strict=False`

Test Plan:
```
python generate.py --compile
--checkpoint-path="/home/kimishpatel/models/llama2/7b/consolidated.00.pth" --params-path="/home/kimishpatel/models/llama2/7b/params_32k_vocab.json" --prompt "Hello, my name is" --device cpu
```
---
 build/builder.py     |  2 +-
 build/gguf_loader.py |  2 +-
 build/model.py       | 26 +++++++++++++-------------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/build/builder.py b/build/builder.py
index 247584012..f558989b7 100644
--- a/build/builder.py
+++ b/build/builder.py
@@ -216,7 +216,7 @@ def _load_model_not_gguf(builder_args):
     if "model" in checkpoint and "stories" in str(builder_args.checkpoint_path):
         checkpoint = checkpoint["model"]
 
-    model.load_state_dict(checkpoint, assign=True)
+    model.load_state_dict(checkpoint, assign=True, strict=False)
 
     if builder_args.use_tp:
         from tp import apply_tp
diff --git a/build/gguf_loader.py b/build/gguf_loader.py
index 3655e804a..464f4536a 100644
--- a/build/gguf_loader.py
+++ b/build/gguf_loader.py
@@ -72,7 +72,7 @@ def _create_pt_model(
 ) -> nn.Module:
     llama_model_args = ModelArgs(
         dim=gguf_model_args.embedding_length,
-        n_layer=gguf_model_args.block_count,
+        n_layers=gguf_model_args.block_count,
         n_heads=gguf_model_args.attention.head_count,
         n_local_heads=gguf_model_args.attention.head_count_kv,
         vocab_size=gguf_model_args.vocab_size,
diff --git a/build/model.py b/build/model.py
index 9e3cc1e71..655405d9f 100644
--- a/build/model.py
+++ b/build/model.py
@@ -25,7 +25,7 @@ def find_multiple(n: int, k: int) -> int:
 class ModelArgs:
     block_size: int = 2048
     vocab_size: int = 32000
-    n_layer: int = 32
+    n_layers: int = 32
     # n_head in gpt-fast
     n_heads: int = 32
     dim: int = 4096
@@ -96,13 +96,13 @@ def from_name(cls, name: str):
 
 transformer_configs = {
     "CodeLlama-7b-Python-hf": dict(
-        block_size=16384, vocab_size=32000, n_layer=32, dim=4096, rope_base=1000000
+        block_size=16384, vocab_size=32000, n_layers=32, dim=4096, rope_base=1000000
     ),
-    "7B": dict(n_layer=32, n_heads=32, dim=4096),
-    "13B": dict(n_layer=40, n_heads=40, dim=5120),
-    "30B": dict(n_layer=60, n_heads=52, dim=6656),
+    "7B": dict(n_layers=32, n_heads=32, dim=4096),
+    "13B": dict(n_layers=40, n_heads=40, dim=5120),
+    "30B": dict(n_layers=60, n_heads=52, dim=6656),
     "34B": dict(
-        n_layer=48,
+        n_layers=48,
         n_heads=64,
         dim=8192,
         vocab_size=32000,
@@ -110,9 +110,9 @@ def from_name(cls, name: str):
         hidden_dim=22016,
         rope_base=1000000,
     ),  # CodeLlama-34B-Python-hf
-    "70B": dict(n_layer=80, n_heads=64, dim=8192, n_local_heads=8, hidden_dim=28672),
+    "70B": dict(n_layers=80, n_heads=64, dim=8192, n_local_heads=8, hidden_dim=28672),
     "Mistral-7B": dict(
-        n_layer=32,
+        n_layers=32,
         n_heads=32,
         n_local_heads=8,
         dim=4096,
@@ -120,7 +120,7 @@ def from_name(cls, name: str):
         vocab_size=32000,
     ),
     "Mistral-7B-Instruct-v0.1": dict(
-        n_layer=32,
+        n_layers=32,
         n_heads=32,
         n_local_heads=8,
         dim=4096,
@@ -128,15 +128,15 @@ def from_name(cls, name: str):
         vocab_size=32000,
     ),
     "Mistral-7B-Instruct-v0.2": dict(
-        n_layer=32,
+        n_layers=32,
         n_heads=32,
         n_local_heads=8,
         dim=4096,
         hidden_dim=14336,
         vocab_size=32000,
     ),
-    "stories15M": dict(n_layer=6, n_heads=6, dim=288),
-    "stories110M": dict(n_layer=12, n_heads=12, dim=768),
+    "stories15M": dict(n_layers=6, n_heads=6, dim=288),
+    "stories110M": dict(n_layers=12, n_heads=12, dim=768),
 }
 
 
@@ -169,7 +169,7 @@ def __init__(self, config: ModelArgs) -> None:
 
         self.tok_embeddings = nn.Embedding(config.vocab_size, config.dim)
         self.layers = nn.ModuleList(
-            TransformerBlock(config) for _ in range(config.n_layer)
+            TransformerBlock(config) for _ in range(config.n_layers)
         )
         self.norm = RMSNorm(config.dim, eps=config.norm_eps)
         self.output = nn.Linear(config.dim, config.vocab_size, bias=False)