From adf5830be6ad6a5ed49cab6454e55ef6a508edd4 Mon Sep 17 00:00:00 2001
From: pythonLoader <shashata@mit.edu>
Date: Mon, 22 Jul 2024 12:51:45 -0400
Subject: [PATCH 1/6] loading models

---
 utils/load_and_analyse_model.ipynb | 140 +++++++++++++++++++++++++++++
 1 file changed, 140 insertions(+)
 create mode 100644 utils/load_and_analyse_model.ipynb

diff --git a/utils/load_and_analyse_model.ipynb b/utils/load_and_analyse_model.ipynb
new file mode 100644
index 000000000..1960c863a
--- /dev/null
+++ b/utils/load_and_analyse_model.ipynb
@@ -0,0 +1,140 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Installation\n",
+    "```\n",
+    "git clone git@github.com:vllm-project/llm-compressor.git\\\n",
+    "cd llm-compressor\\\n",
+    "micromamba create -n weight-analyzer python=3.11\\\n",
+    "pip install -e .\n",
+    "\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f4f98bcaad2644f9b6f9b73193c59d35",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "cb04700ff4124d4987b588be742df735",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4c4a91b064054b0e900f6dde384a2aa1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "cd35e520501446c7a6551201c7e4ebcd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0367ce6a02464a6ba7723ca46991fb8c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import transformers\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
+    "from llmcompressor.transformers import SparseAutoModelForCausalLM\n",
+    "\n",
+    "model_id = \"meta-llama/Meta-Llama-3-8B\"\n",
+    "model = AutoModelForCausalLM.from_pretrained(\n",
+    "    model_id,\n",
+    "    device_map='auto',\n",
+    "    torch_dtype='auto',\n",
+    "    cache_dir=\"/nm/drive0/shashata/weight-analysis\"\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 6e388bbe9fcf39663af045a3f051109d851008a3 Mon Sep 17 00:00:00 2001
From: pythonLoader <shashata@mit.edu>
Date: Mon, 22 Jul 2024 13:21:24 -0400
Subject: [PATCH 2/6] loading models

---
 utils/load_and_analyse_model.ipynb | 98 +++++++++++++++++++++++++-----
 1 file changed, 84 insertions(+), 14 deletions(-)

diff --git a/utils/load_and_analyse_model.ipynb b/utils/load_and_analyse_model.ipynb
index 1960c863a..0caf0aa8d 100644
--- a/utils/load_and_analyse_model.ipynb
+++ b/utils/load_and_analyse_model.ipynb
@@ -16,18 +16,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f4f98bcaad2644f9b6f9b73193c59d35",
+       "model_id": "e39888dd017a44e3a04ea1004ae2991d",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]"
+       "README.md:   0%|          | 0.00/36.5k [00:00<?, ?B/s]"
       ]
      },
      "metadata": {},
@@ -36,12 +36,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cb04700ff4124d4987b588be742df735",
+       "model_id": "bc68109d2f824e19ac6faeedb62b6b37",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]"
+       "USE_POLICY.md:   0%|          | 0.00/4.70k [00:00<?, ?B/s]"
       ]
      },
      "metadata": {},
@@ -50,12 +50,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4c4a91b064054b0e900f6dde384a2aa1",
+       "model_id": "9554f13e565c474e8f0c79e553d9101b",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]"
+       "special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]"
       ]
      },
      "metadata": {},
@@ -64,12 +64,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cd35e520501446c7a6551201c7e4ebcd",
+       "model_id": "b6464da221414270a5e4981979d9d156",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]"
+       "tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]"
       ]
      },
      "metadata": {},
@@ -78,12 +78,26 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0367ce6a02464a6ba7723ca46991fb8c",
+       "model_id": "a5a2a29d6ea444f79f1b631941b781be",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]"
+       "tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8455f3e81e7f4a5b8bdee793aa588268",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -98,7 +112,7 @@
     "from llmcompressor.transformers import SparseAutoModelForCausalLM\n",
     "\n",
     "model_id = \"meta-llama/Meta-Llama-3-8B\"\n",
-    "model = AutoModelForCausalLM.from_pretrained(\n",
+    "model = SparseAutoModelForCausalLM.from_pretrained(\n",
     "    model_id,\n",
     "    device_map='auto',\n",
     "    torch_dtype='auto',\n",
@@ -108,12 +122,68 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LlamaForCausalLM(\n",
+       "  (model): LlamaModel(\n",
+       "    (embed_tokens): Embedding(128256, 4096)\n",
+       "    (layers): ModuleList(\n",
+       "      (0-31): 32 x LlamaDecoderLayer(\n",
+       "        (self_attn): LlamaSdpaAttention(\n",
+       "          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
+       "          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)\n",
+       "          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)\n",
+       "          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
+       "          (rotary_emb): LlamaRotaryEmbedding()\n",
+       "        )\n",
+       "        (mlp): LlamaMLP(\n",
+       "          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)\n",
+       "          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)\n",
+       "          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)\n",
+       "          (act_fn): SiLU()\n",
+       "        )\n",
+       "        (input_layernorm): LlamaRMSNorm()\n",
+       "        (post_attention_layernorm): LlamaRMSNorm()\n",
+       "      )\n",
+       "    )\n",
+       "    (norm): LlamaRMSNorm()\n",
+       "  )\n",
+       "  (lm_head): Linear(in_features=4096, out_features=128256, bias=False)\n",
+       ")"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "model"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "device(type='cuda', index=0)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.model.embed_tokens.weight.device"
+   ]
   }
  ],
  "metadata": {

From 2c8367537d049e54eb9c4a62397f3ae99ce2b60d Mon Sep 17 00:00:00 2001
From: pythonLoader <shashata@mit.edu>
Date: Mon, 22 Jul 2024 14:01:33 -0400
Subject: [PATCH 3/6] Safetensors loading

---
 utils/load_and_analyse_model.ipynb | 178 ++++++++++++-----------------
 1 file changed, 73 insertions(+), 105 deletions(-)

diff --git a/utils/load_and_analyse_model.ipynb b/utils/load_and_analyse_model.ipynb
index 0caf0aa8d..eaa4da6ed 100644
--- a/utils/load_and_analyse_model.ipynb
+++ b/utils/load_and_analyse_model.ipynb
@@ -16,92 +16,36 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e39888dd017a44e3a04ea1004ae2991d",
+       "model_id": "988471c2e2aa4c89b73b7069b2d43f9d",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "README.md:   0%|          | 0.00/36.5k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "bc68109d2f824e19ac6faeedb62b6b37",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "USE_POLICY.md:   0%|          | 0.00/4.70k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "9554f13e565c474e8f0c79e553d9101b",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b6464da221414270a5e4981979d9d156",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]"
+       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
      "output_type": "display_data"
     },
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a5a2a29d6ea444f79f1b631941b781be",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2024-07-22T13:50:36.491824-0400 | save_pretrained_wrapper | INFO - Inferring a sparsity configuration requires a global sparsity calculation. This can be costly for large models. To skip the calculation of compression statistics set skip_compression_stats=True\n"
+     ]
     },
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8455f3e81e7f4a5b8bdee793aa588268",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Calculating model sparsity: 100%|██████████| 291/291 [00:00<00:00, 1173.48it/s]\n"
+     ]
     }
    ],
    "source": [
@@ -117,54 +61,78 @@
     "    device_map='auto',\n",
     "    torch_dtype='auto',\n",
     "    cache_dir=\"/nm/drive0/shashata/weight-analysis\"\n",
-    ")\n"
+    ")\n",
+    "\n",
+    "model.save_pretrained(\"/nm/drive0/shashata/weight-analysis/dense_llama_3_8b\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Loading using safetensors for individual weight files"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import safetensors\n",
+    "\n",
+    "from safetensors import safe_open\n",
+    "\n",
+    "tensors = {}\n",
+    "safetensors_path = \"/nm/drive0/shashata//weight-analysis/dense_llama_3_8b/model-00001-of-00004.safetensors\"\n",
+    "with safe_open(safetensors_path, framework='pt', device='cpu') as f:\n",
+    "    # tensors = safetensors.load(f)\n",
+    "    for k in f.keys():\n",
+    "        tensors[k] = f.get_tensor(k)\n",
+    "\n",
+    "for k, v in tensors.items():\n",
+    "    print(k, v.shape)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Analyzing the weights"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "LlamaForCausalLM(\n",
-       "  (model): LlamaModel(\n",
-       "    (embed_tokens): Embedding(128256, 4096)\n",
-       "    (layers): ModuleList(\n",
-       "      (0-31): 32 x LlamaDecoderLayer(\n",
-       "        (self_attn): LlamaSdpaAttention(\n",
-       "          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
-       "          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)\n",
-       "          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)\n",
-       "          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)\n",
-       "          (rotary_emb): LlamaRotaryEmbedding()\n",
-       "        )\n",
-       "        (mlp): LlamaMLP(\n",
-       "          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)\n",
-       "          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)\n",
-       "          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)\n",
-       "          (act_fn): SiLU()\n",
-       "        )\n",
-       "        (input_layernorm): LlamaRMSNorm()\n",
-       "        (post_attention_layernorm): LlamaRMSNorm()\n",
-       "      )\n",
-       "    )\n",
-       "    (norm): LlamaRMSNorm()\n",
-       "  )\n",
-       "  (lm_head): Linear(in_features=4096, out_features=128256, bias=False)\n",
-       ")"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[ 0.0087, -0.0151, -0.0090,  ...,  0.0079, -0.0039,  0.0134],\n",
+      "        [ 0.0204, -0.0107, -0.0057,  ...,  0.0010,  0.0172,  0.0011],\n",
+      "        [ 0.0082, -0.0075, -0.0023,  ..., -0.0018,  0.0025, -0.0165],\n",
+      "        ...,\n",
+      "        [ 0.0085, -0.0208,  0.0217,  ..., -0.0199,  0.0081, -0.0129],\n",
+      "        [-0.0135, -0.0059, -0.0110,  ...,  0.0093,  0.0015, -0.0131],\n",
+      "        [-0.0029,  0.0069,  0.0085,  ..., -0.0082, -0.0051, -0.0120]],\n",
+      "       dtype=torch.bfloat16)\n"
+     ]
     }
    ],
    "source": [
-    "model"
+    "# print(tensors.keys())\n",
+    "print(tensors['model.layers.0.mlp.down_proj.weight'])\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": 10,

From 45fee72d5070a7bc57141d45aa5ba73f0ab02334 Mon Sep 17 00:00:00 2001
From: pythonLoader <shashata@mit.edu>
Date: Mon, 22 Jul 2024 16:59:09 -0400
Subject: [PATCH 4/6] Weight analysis and visualization for llama3 models
 loaded from safetensors

---
 utils/analyse_model_by_safetensors.py | 129 +++++++++++++++++++
 utils/load_and_analyse_model.ipynb    | 178 --------------------------
 2 files changed, 129 insertions(+), 178 deletions(-)
 create mode 100644 utils/analyse_model_by_safetensors.py
 delete mode 100644 utils/load_and_analyse_model.ipynb

diff --git a/utils/analyse_model_by_safetensors.py b/utils/analyse_model_by_safetensors.py
new file mode 100644
index 000000000..1eb6d905b
--- /dev/null
+++ b/utils/analyse_model_by_safetensors.py
@@ -0,0 +1,129 @@
+import transformers
+import torch
+import torch.nn as nn
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from llmcompressor.transformers import SparseAutoModelForCausalLM
+from scipy.stats import skew, kurtosis
+import matplotlib.pyplot as plt
+import numpy as np
+
+import safetensors
+from safetensors import safe_open
+import os
+import json
+from tqdm import tqdm 
+
+
+
+def get_stats_of_layer(tensors):
+
+    stats_layer = {}
+    for linear_ in tqdm(tensors):
+        stats_layer[linear_] = {}
+        stats_layer[linear_]["min"] = torch.min(tensors[linear_]).item()
+        stats_layer[linear_]["max"] = torch.max(tensors[linear_]).item()
+        stats_layer[linear_]["mean"] = torch.mean(tensors[linear_]).item()
+        stats_layer[linear_]["median"] = torch.median(tensors[linear_]).item()
+        stats_layer[linear_]["std"] = torch.std(tensors[linear_]).item()
+        # float16_tensor = tensors[linear_].to(torch.float16).cpu().numpy().flatten()
+        # stats_layer[linear_]["kurtosis"] = kurtosis(float16_tensor)
+
+    return stats_layer
+
+
+def store_histograms(tensors, layer, model_path, log=True):
+
+    fig, axs = plt.subplots(2, 4, figsize=(20, 10))
+    fig.suptitle(f"Histogram of Linear Operators of Layer {layer}")
+    tensor_keys = sorted(list(tensors.keys()))
+    for i, linear_ in enumerate(tensor_keys):
+        tensor = tensors[linear_].to(torch.float16).cpu().numpy().flatten()
+        axs[i//4, i%4].hist(tensor, bins=100, log=log)
+        axs[i//4, i%4].set_title(linear_)
+
+    plt.savefig(f"{model_path}/histograms/histogram_layer_{layer}.png", dpi=300)
+    plt.close()
+
+if __name__ == "__main__":
+
+    model_id = "meta-llama/Meta-Llama-3-70B"
+    weight_path = "/nm/drive0/shashata/weight-analysis/dense_llama_3_70B"
+    cache_dir = "/nm/drive0/shashata/weight-analysis"
+    presaved_path = f"{cache_dir}/models--{model_id.replace('/', '--')}"
+
+    if not os.path.exists(presaved_path):
+        # os.makedirs(presaved_path)
+        model = SparseAutoModelForCausalLM.from_pretrained(
+            model_id,
+            device_map='auto',
+            torch_dtype='auto',
+            cache_dir=cache_dir
+        )
+        model.save_pretrained(weight_path)
+
+
+
+    linear_operators = ['mlp.gate_proj', 'mlp.down_proj', 'mlp.up_proj', 'self_attn.k_proj', 'self_attn.v_proj', 'self_attn.q_proj', 'self_attn.o_proj']
+    layer_index_file = f"{weight_path}/model.safetensors.index.json"
+
+    # open json file as dictionary
+    with open(layer_index_file, "r") as f:
+        layer_index = json.load(f)['weight_map']
+        layer_keys = list(layer_index.keys())
+
+        # find the max layer number
+        max_layer = max([int(x.split('.')[2]) for x in layer_keys if 'layers' in x])
+        print(max_layer)
+
+    min_layer = 0
+
+    stats = {}
+    for layer in range(min_layer, max_layer+1):
+        # if layer != 1:
+        #     continue
+
+        print(f"Layer {layer}")
+
+        # get the layer keys for layer_index
+        # layer_keys = [x for x in layer_index.keys() if f"layers.{layer}.self_attn" in x]
+        layer_files = []
+        layer_opearators = []
+        layer_tensors = {}
+
+        for op in linear_operators:
+            layer_opearators.extend(x for x in layer_keys if f"layers.{layer}.{op}" in x)
+
+        for lo in layer_opearators:
+            if layer_index[lo] not in layer_files:
+                layer_files.append(layer_index[lo])
+        # print(list(layer_files))
+        print(layer_files)
+        print(layer_opearators)
+        if len(layer_files) == 1:
+            with safe_open(f"{weight_path}/{layer_files[0]}", 'pt', device='cpu') as f:
+                for k in layer_opearators:
+                    layer_tensors[k] = f.get_tensor(k)
+        elif len(layer_files) > 1:
+            for lf in layer_files:
+                with safe_open(f"{weight_path}/{lf}", 'pt', device='cpu') as f:
+                    for k in layer_opearators:
+                        if k in f.keys():
+                            layer_tensors[k] = f.get_tensor(k)
+
+        
+        for k in layer_tensors.keys():
+            print(k, layer_tensors[k].shape)
+        
+        stats.update(get_stats_of_layer(layer_tensors))
+        print(stats)
+        # print(stats[layer])
+
+        store_histograms(layer_tensors, layer, weight_path, log=True)
+        # break
+
+        # if layer > 1:
+        #     break
+    
+    # save the stats using json
+    with open(f"{weight_path}/model_stats.json", "w") as f:
+        json.dump(stats, f)
diff --git a/utils/load_and_analyse_model.ipynb b/utils/load_and_analyse_model.ipynb
deleted file mode 100644
index eaa4da6ed..000000000
--- a/utils/load_and_analyse_model.ipynb
+++ /dev/null
@@ -1,178 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Installation\n",
-    "```\n",
-    "git clone git@github.com:vllm-project/llm-compressor.git\\\n",
-    "cd llm-compressor\\\n",
-    "micromamba create -n weight-analyzer python=3.11\\\n",
-    "pip install -e .\n",
-    "\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "988471c2e2aa4c89b73b7069b2d43f9d",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2024-07-22T13:50:36.491824-0400 | save_pretrained_wrapper | INFO - Inferring a sparsity configuration requires a global sparsity calculation. This can be costly for large models. To skip the calculation of compression statistics set skip_compression_stats=True\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Calculating model sparsity: 100%|██████████| 291/291 [00:00<00:00, 1173.48it/s]\n"
-     ]
-    }
-   ],
-   "source": [
-    "import transformers\n",
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
-    "from llmcompressor.transformers import SparseAutoModelForCausalLM\n",
-    "\n",
-    "model_id = \"meta-llama/Meta-Llama-3-8B\"\n",
-    "model = SparseAutoModelForCausalLM.from_pretrained(\n",
-    "    model_id,\n",
-    "    device_map='auto',\n",
-    "    torch_dtype='auto',\n",
-    "    cache_dir=\"/nm/drive0/shashata/weight-analysis\"\n",
-    ")\n",
-    "\n",
-    "model.save_pretrained(\"/nm/drive0/shashata/weight-analysis/dense_llama_3_8b\")\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Loading using safetensors for individual weight files"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import safetensors\n",
-    "\n",
-    "from safetensors import safe_open\n",
-    "\n",
-    "tensors = {}\n",
-    "safetensors_path = \"/nm/drive0/shashata//weight-analysis/dense_llama_3_8b/model-00001-of-00004.safetensors\"\n",
-    "with safe_open(safetensors_path, framework='pt', device='cpu') as f:\n",
-    "    # tensors = safetensors.load(f)\n",
-    "    for k in f.keys():\n",
-    "        tensors[k] = f.get_tensor(k)\n",
-    "\n",
-    "for k, v in tensors.items():\n",
-    "    print(k, v.shape)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Analyzing the weights"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([[ 0.0087, -0.0151, -0.0090,  ...,  0.0079, -0.0039,  0.0134],\n",
-      "        [ 0.0204, -0.0107, -0.0057,  ...,  0.0010,  0.0172,  0.0011],\n",
-      "        [ 0.0082, -0.0075, -0.0023,  ..., -0.0018,  0.0025, -0.0165],\n",
-      "        ...,\n",
-      "        [ 0.0085, -0.0208,  0.0217,  ..., -0.0199,  0.0081, -0.0129],\n",
-      "        [-0.0135, -0.0059, -0.0110,  ...,  0.0093,  0.0015, -0.0131],\n",
-      "        [-0.0029,  0.0069,  0.0085,  ..., -0.0082, -0.0051, -0.0120]],\n",
-      "       dtype=torch.bfloat16)\n"
-     ]
-    }
-   ],
-   "source": [
-    "# print(tensors.keys())\n",
-    "print(tensors['model.layers.0.mlp.down_proj.weight'])\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "device(type='cuda', index=0)"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model.model.embed_tokens.weight.device"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

From b1cae21ead9f6f63794ad3d732546d7387fd1348 Mon Sep 17 00:00:00 2001
From: pythonLoader <shashata@mit.edu>
Date: Mon, 22 Jul 2024 17:13:20 -0400
Subject: [PATCH 5/6] Cleaning up the code and skipping kurtosis

---
 utils/analyse_model_by_safetensors.py | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/utils/analyse_model_by_safetensors.py b/utils/analyse_model_by_safetensors.py
index 1eb6d905b..a6fd579b4 100644
--- a/utils/analyse_model_by_safetensors.py
+++ b/utils/analyse_model_by_safetensors.py
@@ -6,7 +6,6 @@
 from scipy.stats import skew, kurtosis
 import matplotlib.pyplot as plt
 import numpy as np
-
 import safetensors
 from safetensors import safe_open
 import os
@@ -61,8 +60,6 @@ def store_histograms(tensors, layer, model_path, log=True):
         )
         model.save_pretrained(weight_path)
 
-
-
     linear_operators = ['mlp.gate_proj', 'mlp.down_proj', 'mlp.up_proj', 'self_attn.k_proj', 'self_attn.v_proj', 'self_attn.q_proj', 'self_attn.o_proj']
     layer_index_file = f"{weight_path}/model.safetensors.index.json"
 
@@ -73,19 +70,14 @@ def store_histograms(tensors, layer, model_path, log=True):
 
         # find the max layer number
         max_layer = max([int(x.split('.')[2]) for x in layer_keys if 'layers' in x])
-        print(max_layer)
+        print("Total Layers ->", max_layer+1)
 
     min_layer = 0
 
     stats = {}
+    print("Starting to work with layers")
     for layer in range(min_layer, max_layer+1):
-        # if layer != 1:
-        #     continue
-
         print(f"Layer {layer}")
-
-        # get the layer keys for layer_index
-        # layer_keys = [x for x in layer_index.keys() if f"layers.{layer}.self_attn" in x]
         layer_files = []
         layer_opearators = []
         layer_tensors = {}
@@ -96,7 +88,7 @@ def store_histograms(tensors, layer, model_path, log=True):
         for lo in layer_opearators:
             if layer_index[lo] not in layer_files:
                 layer_files.append(layer_index[lo])
-        # print(list(layer_files))
+
         print(layer_files)
         print(layer_opearators)
         if len(layer_files) == 1:
@@ -114,16 +106,14 @@ def store_histograms(tensors, layer, model_path, log=True):
         for k in layer_tensors.keys():
             print(k, layer_tensors[k].shape)
         
-        stats.update(get_stats_of_layer(layer_tensors))
-        print(stats)
+        layer_stats = get_stats_of_layer(layer_tensors)
+        stats.update(layer_stats)
+        print(layer_stats)
+        # print(stats)
         # print(stats[layer])
 
         store_histograms(layer_tensors, layer, weight_path, log=True)
-        # break
 
-        # if layer > 1:
-        #     break
-    
     # save the stats using json
     with open(f"{weight_path}/model_stats.json", "w") as f:
         json.dump(stats, f)

From 4e55a1dda19b3db72054fcab562a75f94b76da2b Mon Sep 17 00:00:00 2001
From: pythonLoader <shashata@mit.edu>
Date: Mon, 22 Jul 2024 17:16:56 -0400
Subject: [PATCH 6/6] code clean up

---
 utils/analyse_model_by_safetensors.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/utils/analyse_model_by_safetensors.py b/utils/analyse_model_by_safetensors.py
index a6fd579b4..34aa86b7b 100644
--- a/utils/analyse_model_by_safetensors.py
+++ b/utils/analyse_model_by_safetensors.py
@@ -45,11 +45,11 @@ def store_histograms(tensors, layer, model_path, log=True):
 
 if __name__ == "__main__":
 
-    model_id = "meta-llama/Meta-Llama-3-70B"
-    weight_path = "/nm/drive0/shashata/weight-analysis/dense_llama_3_70B"
+    model_size = "8B"
+    model_id = f"meta-llama/Meta-Llama-3-{model_size}"
+    weight_path = f"/nm/drive0/shashata/weight-analysis/dense_llama_3_{model_size}"
     cache_dir = "/nm/drive0/shashata/weight-analysis"
     presaved_path = f"{cache_dir}/models--{model_id.replace('/', '--')}"
-
     if not os.path.exists(presaved_path):
         # os.makedirs(presaved_path)
         model = SparseAutoModelForCausalLM.from_pretrained(