From adf5830be6ad6a5ed49cab6454e55ef6a508edd4 Mon Sep 17 00:00:00 2001 From: pythonLoader Date: Mon, 22 Jul 2024 12:51:45 -0400 Subject: [PATCH 1/6] loading models --- utils/load_and_analyse_model.ipynb | 140 +++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 utils/load_and_analyse_model.ipynb diff --git a/utils/load_and_analyse_model.ipynb b/utils/load_and_analyse_model.ipynb new file mode 100644 index 000000000..1960c863a --- /dev/null +++ b/utils/load_and_analyse_model.ipynb @@ -0,0 +1,140 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Installation\n", + "```\n", + "git clone git@github.com:vllm-project/llm-compressor.git\\\n", + "cd llm-compressor\\\n", + "micromamba create -n weight-analyzer python=3.11\\\n", + "pip install -e .\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f4f98bcaad2644f9b6f9b73193c59d35", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "config.json: 0%| | 0.00/654 [00:00 Date: Mon, 22 Jul 2024 13:21:24 -0400 Subject: [PATCH 2/6] loading models --- utils/load_and_analyse_model.ipynb | 98 +++++++++++++++++++++++++----- 1 file changed, 84 insertions(+), 14 deletions(-) diff --git a/utils/load_and_analyse_model.ipynb b/utils/load_and_analyse_model.ipynb index 1960c863a..0caf0aa8d 100644 --- a/utils/load_and_analyse_model.ipynb +++ b/utils/load_and_analyse_model.ipynb @@ -16,18 +16,18 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "f4f98bcaad2644f9b6f9b73193c59d35", + "model_id": "e39888dd017a44e3a04ea1004ae2991d", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "config.json: 0%| | 0.00/654 [00:00 Date: Mon, 22 Jul 2024 14:01:33 -0400 Subject: [PATCH 3/6] Safetensors loading --- utils/load_and_analyse_model.ipynb | 178 ++++++++++++----------------- 1 file changed, 73 insertions(+), 105 deletions(-) diff --git a/utils/load_and_analyse_model.ipynb b/utils/load_and_analyse_model.ipynb index 0caf0aa8d..eaa4da6ed 100644 --- a/utils/load_and_analyse_model.ipynb +++ b/utils/load_and_analyse_model.ipynb @@ -16,92 +16,36 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e39888dd017a44e3a04ea1004ae2991d", + "model_id": "988471c2e2aa4c89b73b7069b2d43f9d", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "README.md: 0%| | 0.00/36.5k [00:00 Date: Mon, 22 Jul 2024 16:59:09 -0400 Subject: [PATCH 4/6] Weight analysis and visualization for llama3 models loaded from safetensors --- utils/analyse_model_by_safetensors.py | 129 +++++++++++++++++++ utils/load_and_analyse_model.ipynb | 178 -------------------------- 2 files changed, 129 insertions(+), 178 deletions(-) create mode 100644 utils/analyse_model_by_safetensors.py delete mode 100644 utils/load_and_analyse_model.ipynb diff --git a/utils/analyse_model_by_safetensors.py b/utils/analyse_model_by_safetensors.py new file mode 100644 index 000000000..1eb6d905b --- /dev/null +++ b/utils/analyse_model_by_safetensors.py @@ -0,0 +1,129 @@ +import transformers +import torch +import torch.nn as nn +from transformers import AutoModelForCausalLM, AutoTokenizer +from llmcompressor.transformers import SparseAutoModelForCausalLM +from scipy.stats import skew, kurtosis +import matplotlib.pyplot as plt +import numpy as np + +import safetensors +from safetensors import safe_open +import os +import json +from tqdm import tqdm + + + +def get_stats_of_layer(tensors): + + stats_layer = {} + for linear_ in tqdm(tensors): + stats_layer[linear_] = {} + stats_layer[linear_]["min"] = torch.min(tensors[linear_]).item() + stats_layer[linear_]["max"] = torch.max(tensors[linear_]).item() + stats_layer[linear_]["mean"] = torch.mean(tensors[linear_]).item() + stats_layer[linear_]["median"] = torch.median(tensors[linear_]).item() + stats_layer[linear_]["std"] = torch.std(tensors[linear_]).item() + # float16_tensor = tensors[linear_].to(torch.float16).cpu().numpy().flatten() + # stats_layer[linear_]["kurtosis"] = kurtosis(float16_tensor) + + return stats_layer + + +def store_histograms(tensors, layer, model_path, log=True): + + fig, axs = plt.subplots(2, 4, figsize=(20, 10)) + fig.suptitle(f"Histogram of Linear Operators of Layer {layer}") + tensor_keys = sorted(list(tensors.keys())) + for i, linear_ in enumerate(tensor_keys): + tensor = tensors[linear_].to(torch.float16).cpu().numpy().flatten() + axs[i//4, i%4].hist(tensor, bins=100, log=log) + axs[i//4, i%4].set_title(linear_) + + plt.savefig(f"{model_path}/histograms/histogram_layer_{layer}.png", dpi=300) + plt.close() + +if __name__ == "__main__": + + model_id = "meta-llama/Meta-Llama-3-70B" + weight_path = "/nm/drive0/shashata/weight-analysis/dense_llama_3_70B" + cache_dir = "/nm/drive0/shashata/weight-analysis" + presaved_path = f"{cache_dir}/models--{model_id.replace('/', '--')}" + + if not os.path.exists(presaved_path): + # os.makedirs(presaved_path) + model = SparseAutoModelForCausalLM.from_pretrained( + model_id, + device_map='auto', + torch_dtype='auto', + cache_dir=cache_dir + ) + model.save_pretrained(weight_path) + + + + linear_operators = ['mlp.gate_proj', 'mlp.down_proj', 'mlp.up_proj', 'self_attn.k_proj', 'self_attn.v_proj', 'self_attn.q_proj', 'self_attn.o_proj'] + layer_index_file = f"{weight_path}/model.safetensors.index.json" + + # open json file as dictionary + with open(layer_index_file, "r") as f: + layer_index = json.load(f)['weight_map'] + layer_keys = list(layer_index.keys()) + + # find the max layer number + max_layer = max([int(x.split('.')[2]) for x in layer_keys if 'layers' in x]) + print(max_layer) + + min_layer = 0 + + stats = {} + for layer in range(min_layer, max_layer+1): + # if layer != 1: + # continue + + print(f"Layer {layer}") + + # get the layer keys for layer_index + # layer_keys = [x for x in layer_index.keys() if f"layers.{layer}.self_attn" in x] + layer_files = [] + layer_opearators = [] + layer_tensors = {} + + for op in linear_operators: + layer_opearators.extend(x for x in layer_keys if f"layers.{layer}.{op}" in x) + + for lo in layer_opearators: + if layer_index[lo] not in layer_files: + layer_files.append(layer_index[lo]) + # print(list(layer_files)) + print(layer_files) + print(layer_opearators) + if len(layer_files) == 1: + with safe_open(f"{weight_path}/{layer_files[0]}", 'pt', device='cpu') as f: + for k in layer_opearators: + layer_tensors[k] = f.get_tensor(k) + elif len(layer_files) > 1: + for lf in layer_files: + with safe_open(f"{weight_path}/{lf}", 'pt', device='cpu') as f: + for k in layer_opearators: + if k in f.keys(): + layer_tensors[k] = f.get_tensor(k) + + + for k in layer_tensors.keys(): + print(k, layer_tensors[k].shape) + + stats.update(get_stats_of_layer(layer_tensors)) + print(stats) + # print(stats[layer]) + + store_histograms(layer_tensors, layer, weight_path, log=True) + # break + + # if layer > 1: + # break + + # save the stats using json + with open(f"{weight_path}/model_stats.json", "w") as f: + json.dump(stats, f) diff --git a/utils/load_and_analyse_model.ipynb b/utils/load_and_analyse_model.ipynb deleted file mode 100644 index eaa4da6ed..000000000 --- a/utils/load_and_analyse_model.ipynb +++ /dev/null @@ -1,178 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Installation\n", - "```\n", - "git clone git@github.com:vllm-project/llm-compressor.git\\\n", - "cd llm-compressor\\\n", - "micromamba create -n weight-analyzer python=3.11\\\n", - "pip install -e .\n", - "\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "988471c2e2aa4c89b73b7069b2d43f9d", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Loading checkpoint shards: 0%| | 0/4 [00:00 Date: Mon, 22 Jul 2024 17:13:20 -0400 Subject: [PATCH 5/6] Cleaning up the code and skipping kurtosis --- utils/analyse_model_by_safetensors.py | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/utils/analyse_model_by_safetensors.py b/utils/analyse_model_by_safetensors.py index 1eb6d905b..a6fd579b4 100644 --- a/utils/analyse_model_by_safetensors.py +++ b/utils/analyse_model_by_safetensors.py @@ -6,7 +6,6 @@ from scipy.stats import skew, kurtosis import matplotlib.pyplot as plt import numpy as np - import safetensors from safetensors import safe_open import os @@ -61,8 +60,6 @@ def store_histograms(tensors, layer, model_path, log=True): ) model.save_pretrained(weight_path) - - linear_operators = ['mlp.gate_proj', 'mlp.down_proj', 'mlp.up_proj', 'self_attn.k_proj', 'self_attn.v_proj', 'self_attn.q_proj', 'self_attn.o_proj'] layer_index_file = f"{weight_path}/model.safetensors.index.json" @@ -73,19 +70,14 @@ def store_histograms(tensors, layer, model_path, log=True): # find the max layer number max_layer = max([int(x.split('.')[2]) for x in layer_keys if 'layers' in x]) - print(max_layer) + print("Total Layers ->", max_layer+1) min_layer = 0 stats = {} + print("Starting to work with layers") for layer in range(min_layer, max_layer+1): - # if layer != 1: - # continue - print(f"Layer {layer}") - - # get the layer keys for layer_index - # layer_keys = [x for x in layer_index.keys() if f"layers.{layer}.self_attn" in x] layer_files = [] layer_opearators = [] layer_tensors = {} @@ -96,7 +88,7 @@ def store_histograms(tensors, layer, model_path, log=True): for lo in layer_opearators: if layer_index[lo] not in layer_files: layer_files.append(layer_index[lo]) - # print(list(layer_files)) + print(layer_files) print(layer_opearators) if len(layer_files) == 1: @@ -114,16 +106,14 @@ def store_histograms(tensors, layer, model_path, log=True): for k in layer_tensors.keys(): print(k, layer_tensors[k].shape) - stats.update(get_stats_of_layer(layer_tensors)) - print(stats) + layer_stats = get_stats_of_layer(layer_tensors) + stats.update(layer_stats) + print(layer_stats) + # print(stats) # print(stats[layer]) store_histograms(layer_tensors, layer, weight_path, log=True) - # break - # if layer > 1: - # break - # save the stats using json with open(f"{weight_path}/model_stats.json", "w") as f: json.dump(stats, f) From 4e55a1dda19b3db72054fcab562a75f94b76da2b Mon Sep 17 00:00:00 2001 From: pythonLoader Date: Mon, 22 Jul 2024 17:16:56 -0400 Subject: [PATCH 6/6] code clean up --- utils/analyse_model_by_safetensors.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/analyse_model_by_safetensors.py b/utils/analyse_model_by_safetensors.py index a6fd579b4..34aa86b7b 100644 --- a/utils/analyse_model_by_safetensors.py +++ b/utils/analyse_model_by_safetensors.py @@ -45,11 +45,11 @@ def store_histograms(tensors, layer, model_path, log=True): if __name__ == "__main__": - model_id = "meta-llama/Meta-Llama-3-70B" - weight_path = "/nm/drive0/shashata/weight-analysis/dense_llama_3_70B" + model_size = "8B" + model_id = f"meta-llama/Meta-Llama-3-{model_size}" + weight_path = f"/nm/drive0/shashata/weight-analysis/dense_llama_3_{model_size}" cache_dir = "/nm/drive0/shashata/weight-analysis" presaved_path = f"{cache_dir}/models--{model_id.replace('/', '--')}" - if not os.path.exists(presaved_path): # os.makedirs(presaved_path) model = SparseAutoModelForCausalLM.from_pretrained(