diff --git a/lm-hackers.ipynb b/lm-hackers.ipynb index 12795c1..70b0c71 100644 --- a/lm-hackers.ipynb +++ b/lm-hackers.ipynb @@ -1256,6 +1256,33 @@ "tokr.batch_decode(res)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e769bff", + "metadata": {}, + "outputs": [], + "source": [ + "# Funciton to clean memory before using new model\n", + "import gc\n", + "\n", + "# Conveninence function\n", + "def get_cuda_memory_reserved_gb():\n", + " return torch.cuda.memory_reserved()/1_000_000_000\n", + " \n", + "def free_memory(verbose=False):\n", + " gc.collect() \n", + " torch.cuda.empty_cache()\n", + " if verbose:\n", + " print(f'ℹ️ CUDA MEMORY RESERVED AFTER free_memory: {get_cuda_memory_reserved_gb()} GB') \n", + "\n", + "# Let's see how it works:\n", + "print(f'BREFORE free_memory: {get_cuda_memory_reserved_gb()} GB')\n", + "del model # Delete model to be able to free memory\n", + "print(f'BREFORE free_memory after deleting `model`: {get_cuda_memory_reserved_gb()} GB')\n", + "free_memory(verbose=True)" + ] + }, { "cell_type": "code", "execution_count": 28, @@ -1321,7 +1348,9 @@ "metadata": {}, "outputs": [], "source": [ - "model = AutoModelForCausalLM.from_pretrained('TheBloke/Llama-2-7b-Chat-GPTQ', device_map=0, torch_dtype=torch.float16)" + "del model; free_memory(True); # Cleanup before loading new one\n", + "model = AutoModelForCausalLM.from_pretrained('TheBloke/Llama-2-7b-Chat-GPTQ', device_map=0, torch_dtype=torch.float16)\n", + "print(f'CUDA MEMORY RESERVED AFTER LOADING model: {get_cuda_memory_reserved_gb()} GB')" ] }, { @@ -1364,6 +1393,7 @@ "metadata": {}, "outputs": [], "source": [ + "del model; free_memory();\n", "mn = 'TheBloke/Llama-2-13B-GPTQ'\n", "model = AutoModelForCausalLM.from_pretrained(mn, device_map=0, torch_dtype=torch.float16)" ] @@ -1465,6 +1495,7 @@ } ], "source": [ + "del model; free_memory();\n", "mn = \"stabilityai/StableBeluga-7B\"\n", "model = AutoModelForCausalLM.from_pretrained(mn, device_map=0, torch_dtype=torch.bfloat16)" ] @@ -1535,6 +1566,7 @@ "metadata": {}, "outputs": [], "source": [ + "del model; free_memory();\n", "mn = 'TheBloke/OpenOrca-Platypus2-13B-GPTQ'\n", "model = AutoModelForCausalLM.from_pretrained(mn, device_map=0, torch_dtype=torch.float16)" ] @@ -2069,6 +2101,7 @@ "metadata": {}, "outputs": [], "source": [ + "del model; free_memory();\n", "model = AutoModelForCausalLM.from_pretrained('meta-llama/Llama-2-7b-hf',\n", " torch_dtype=torch.bfloat16, device_map=0)\n", "model = PeftModel.from_pretrained(model, ax_model)\n", @@ -2119,6 +2152,17 @@ "print(tokr.batch_decode(res)[0])" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "b17c43c0", + "metadata": {}, + "outputs": [], + "source": [ + "# Finally release model and memory\n", + "del model; free_memory(True);" + ] + }, { "cell_type": "markdown", "id": "991d4a93-dab8-4777-82d2-301c494deba0",