From 21ce909bb608d101067dc3fcf2be15716e390d3e Mon Sep 17 00:00:00 2001 From: Josh Bell Date: Sat, 1 Feb 2025 15:50:12 -0400 Subject: [PATCH] Work with Mac M1/M2/M3 --- .gitignore | 1 + shap_e/diffusion/gaussian_diffusion.py | 9 +- .../examples/sample_text_to_3d_mac_m3.ipynb | 191 ++++++++++++++++++ 3 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 shap_e/examples/sample_text_to_3d_mac_m3.ipynb diff --git a/.gitignore b/.gitignore index e4c0592c3..f51c91b9b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ __pycache__/ .DS_Store *.egg-info/ +shap_e_model_cache/ \ No newline at end of file diff --git a/shap_e/diffusion/gaussian_diffusion.py b/shap_e/diffusion/gaussian_diffusion.py index 1f0e4a48e..285c98d89 100644 --- a/shap_e/diffusion/gaussian_diffusion.py +++ b/shap_e/diffusion/gaussian_diffusion.py @@ -10,6 +10,8 @@ import torch as th import yaml +is_mps = th.backends.mps.is_available() + def diffusion_from_config(config: Union[str, Dict[str, Any]]) -> "GaussianDiffusion": if isinstance(config, str): @@ -1065,7 +1067,12 @@ def _extract_into_tensor(arr, timesteps, broadcast_shape): dimension equal to the length of timesteps. :return: a tensor of shape [batch_size, 1, ...] where the shape has K dims. """ - res = th.from_numpy(arr).to(device=timesteps.device)[timesteps].float() + res = None + if is_mps: + res = th.from_numpy(arr.astype(np.float32)).to(device=timesteps.device)[timesteps].float() + else: + res = th.from_numpy(arr).to(device=timesteps.device)[timesteps].float() + while len(res.shape) < len(broadcast_shape): res = res[..., None] return res + th.zeros(broadcast_shape, device=timesteps.device) diff --git a/shap_e/examples/sample_text_to_3d_mac_m3.ipynb b/shap_e/examples/sample_text_to_3d_mac_m3.ipynb new file mode 100644 index 000000000..8424f69f4 --- /dev/null +++ b/shap_e/examples/sample_text_to_3d_mac_m3.ipynb @@ -0,0 +1,191 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "964ccced", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "\n", + "from shap_e.diffusion.sample import sample_latents\n", + "from shap_e.diffusion.gaussian_diffusion import diffusion_from_config\n", + "from shap_e.models.download import load_model, load_config\n", + "from shap_e.util.notebooks import create_pan_cameras, decode_latent_images, gif_widget" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "8eed3a76", + "metadata": {}, + "outputs": [], + "source": [ + "device = torch.device('mps' if torch.mps.is_available() else 'cpu')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2d922637", + "metadata": {}, + "outputs": [], + "source": [ + "xm = load_model('transmitter', device=device)\n", + "model = load_model('text300M', device=device)\n", + "diffusion = diffusion_from_config(load_config('diffusion'))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "53d329d0", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "415aa46d2b274bce8461a16165e02b09", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/64 [00:00 5\u001b[0m latents \u001b[38;5;241m=\u001b[39m \u001b[43msample_latents\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mdiffusion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdiffusion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mguidance_scale\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtensor\u001b[49m\u001b[43m(\u001b[49m\u001b[43mguidance_scale\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat32\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Ensure float32\u001b[39;49;00m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mdict\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mtexts\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43mprogress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[43mclip_denoised\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_fp16\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Disable FP16 (not supported on MPS)\u001b[39;49;00m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_karras\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[43mkarras_steps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m64\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 16\u001b[0m \u001b[43m \u001b[49m\u001b[43msigma_min\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtensor\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m1e-3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat32\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Force float32\u001b[39;49;00m\n\u001b[1;32m 17\u001b[0m \u001b[43m \u001b[49m\u001b[43msigma_max\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtensor\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m160\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat32\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Force float32\u001b[39;49;00m\n\u001b[1;32m 18\u001b[0m \u001b[43m \u001b[49m\u001b[43ms_churn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtensor\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat32\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Force float32\u001b[39;49;00m\n\u001b[1;32m 19\u001b[0m \u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Documents/playground/text-to-3d/shap-e/shap_e/diffusion/sample.py:62\u001b[0m, in \u001b[0;36msample_latents\u001b[0;34m(batch_size, model, diffusion, model_kwargs, guidance_scale, clip_denoised, use_fp16, use_karras, karras_steps, sigma_min, sigma_max, s_churn, device, progress)\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mautocast(device_type\u001b[38;5;241m=\u001b[39mdevice\u001b[38;5;241m.\u001b[39mtype, enabled\u001b[38;5;241m=\u001b[39muse_fp16):\n\u001b[1;32m 61\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_karras:\n\u001b[0;32m---> 62\u001b[0m samples \u001b[38;5;241m=\u001b[39m \u001b[43mkarras_sample\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 63\u001b[0m \u001b[43m \u001b[49m\u001b[43mdiffusion\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdiffusion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 64\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 65\u001b[0m \u001b[43m \u001b[49m\u001b[43mshape\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_shape\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 66\u001b[0m \u001b[43m \u001b[49m\u001b[43msteps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkarras_steps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 67\u001b[0m \u001b[43m \u001b[49m\u001b[43mclip_denoised\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclip_denoised\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 68\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 69\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 70\u001b[0m \u001b[43m \u001b[49m\u001b[43msigma_min\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msigma_min\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 71\u001b[0m \u001b[43m \u001b[49m\u001b[43msigma_max\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msigma_max\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 72\u001b[0m \u001b[43m \u001b[49m\u001b[43ms_churn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43ms_churn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 73\u001b[0m \u001b[43m \u001b[49m\u001b[43mguidance_scale\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mguidance_scale\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 74\u001b[0m \u001b[43m \u001b[49m\u001b[43mprogress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprogress\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 75\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 76\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 77\u001b[0m internal_batch_size \u001b[38;5;241m=\u001b[39m batch_size\n", + "File \u001b[0;32m~/Documents/playground/text-to-3d/shap-e/shap_e/diffusion/k_diffusion.py:113\u001b[0m, in \u001b[0;36mkarras_sample\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mkarras_sample\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 112\u001b[0m last \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 113\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mkarras_sample_progressive\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 114\u001b[0m \u001b[43m \u001b[49m\u001b[43mlast\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mx\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m last\n", + "File \u001b[0;32m~/Documents/playground/text-to-3d/shap-e/shap_e/diffusion/k_diffusion.py:181\u001b[0m, in \u001b[0;36mkarras_sample_progressive\u001b[0;34m(diffusion, model, shape, steps, clip_denoised, progress, model_kwargs, device, sigma_min, sigma_max, rho, sampler, s_churn, s_tmin, s_tmax, s_noise, guidance_scale)\u001b[0m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 179\u001b[0m guided_denoiser \u001b[38;5;241m=\u001b[39m denoiser\n\u001b[0;32m--> 181\u001b[0m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43msample_fn\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 182\u001b[0m \u001b[43m \u001b[49m\u001b[43mguided_denoiser\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 183\u001b[0m \u001b[43m \u001b[49m\u001b[43mx_T\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 184\u001b[0m \u001b[43m \u001b[49m\u001b[43msigmas\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 185\u001b[0m \u001b[43m \u001b[49m\u001b[43mprogress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprogress\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 186\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msampler_args\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 187\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 188\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mdiffusion\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mGaussianDiffusion\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 189\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01myield\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mdiffusion\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43munscale_out_dict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.pyenv/versions/textTo3d/lib/python3.11/site-packages/torch/utils/_contextlib.py:57\u001b[0m, in \u001b[0;36m_wrap_generator..generator_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 55\u001b[0m \u001b[38;5;66;03m# Pass the last request to the generator and get its response\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m---> 57\u001b[0m response \u001b[38;5;241m=\u001b[39m gen\u001b[38;5;241m.\u001b[39msend(request)\n\u001b[1;32m 59\u001b[0m \u001b[38;5;66;03m# We let the exceptions raised above by the generator's `.throw` or\u001b[39;00m\n\u001b[1;32m 60\u001b[0m \u001b[38;5;66;03m# `.send` methods bubble up to our caller, except for StopIteration\u001b[39;00m\n\u001b[1;32m 61\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 62\u001b[0m \u001b[38;5;66;03m# The generator informed us that it is done: take whatever its\u001b[39;00m\n\u001b[1;32m 63\u001b[0m \u001b[38;5;66;03m# returned value (if any) was and indicate that we're done too\u001b[39;00m\n\u001b[1;32m 64\u001b[0m \u001b[38;5;66;03m# by returning it (see docs for python's return-statement).\u001b[39;00m\n", + "File \u001b[0;32m~/Documents/playground/text-to-3d/shap-e/shap_e/diffusion/k_diffusion.py:275\u001b[0m, in \u001b[0;36msample_heun\u001b[0;34m(denoiser, x, sigmas, progress, s_churn, s_tmin, s_tmax, s_noise)\u001b[0m\n\u001b[1;32m 272\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 273\u001b[0m \u001b[38;5;66;03m# Heun's method\u001b[39;00m\n\u001b[1;32m 274\u001b[0m x_2 \u001b[38;5;241m=\u001b[39m x \u001b[38;5;241m+\u001b[39m d \u001b[38;5;241m*\u001b[39m dt\n\u001b[0;32m--> 275\u001b[0m denoised_2 \u001b[38;5;241m=\u001b[39m \u001b[43mdenoiser\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx_2\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msigmas\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[43ms_in\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 276\u001b[0m d_2 \u001b[38;5;241m=\u001b[39m to_d(x_2, sigmas[i \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m], denoised_2)\n\u001b[1;32m 277\u001b[0m d_prime \u001b[38;5;241m=\u001b[39m (d \u001b[38;5;241m+\u001b[39m d_2) \u001b[38;5;241m/\u001b[39m \u001b[38;5;241m2\u001b[39m\n", + "File \u001b[0;32m~/Documents/playground/text-to-3d/shap-e/shap_e/diffusion/k_diffusion.py:173\u001b[0m, in \u001b[0;36mkarras_sample_progressive..guided_denoiser\u001b[0;34m(x_t, sigma)\u001b[0m\n\u001b[1;32m 171\u001b[0m x_t \u001b[38;5;241m=\u001b[39m th\u001b[38;5;241m.\u001b[39mcat([x_t, x_t], dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 172\u001b[0m sigma \u001b[38;5;241m=\u001b[39m th\u001b[38;5;241m.\u001b[39mcat([sigma, sigma], dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m)\n\u001b[0;32m--> 173\u001b[0m x_0 \u001b[38;5;241m=\u001b[39m \u001b[43mdenoiser\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx_t\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msigma\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 174\u001b[0m cond_x_0, uncond_x_0 \u001b[38;5;241m=\u001b[39m th\u001b[38;5;241m.\u001b[39msplit(x_0, \u001b[38;5;28mlen\u001b[39m(x_0) \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m \u001b[38;5;241m2\u001b[39m, dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 175\u001b[0m x_0 \u001b[38;5;241m=\u001b[39m uncond_x_0 \u001b[38;5;241m+\u001b[39m guidance_scale \u001b[38;5;241m*\u001b[39m (cond_x_0 \u001b[38;5;241m-\u001b[39m uncond_x_0)\n", + "File \u001b[0;32m~/Documents/playground/text-to-3d/shap-e/shap_e/diffusion/k_diffusion.py:160\u001b[0m, in \u001b[0;36mkarras_sample_progressive..denoiser\u001b[0;34m(x_t, sigma)\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mdenoiser\u001b[39m(x_t, sigma):\n\u001b[0;32m--> 160\u001b[0m _, denoised \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdenoise\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43mx_t\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msigma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mclip_denoised\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclip_denoised\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_kwargs\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m denoised\n", + "File \u001b[0;32m~/Documents/playground/text-to-3d/shap-e/shap_e/diffusion/k_diffusion.py:105\u001b[0m, in \u001b[0;36mGaussianToKarrasDenoiser.denoise\u001b[0;34m(self, x_t, sigmas, clip_denoised, model_kwargs)\u001b[0m\n\u001b[1;32m 99\u001b[0m t \u001b[38;5;241m=\u001b[39m th\u001b[38;5;241m.\u001b[39mtensor(\n\u001b[1;32m 100\u001b[0m [\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msigma_to_t(sigma) \u001b[38;5;28;01mfor\u001b[39;00m sigma \u001b[38;5;129;01min\u001b[39;00m sigmas\u001b[38;5;241m.\u001b[39mcpu()\u001b[38;5;241m.\u001b[39mnumpy()],\n\u001b[1;32m 101\u001b[0m dtype\u001b[38;5;241m=\u001b[39mth\u001b[38;5;241m.\u001b[39mlong,\n\u001b[1;32m 102\u001b[0m device\u001b[38;5;241m=\u001b[39msigmas\u001b[38;5;241m.\u001b[39mdevice,\n\u001b[1;32m 103\u001b[0m )\n\u001b[1;32m 104\u001b[0m c_in \u001b[38;5;241m=\u001b[39m append_dims(\u001b[38;5;241m1.0\u001b[39m \u001b[38;5;241m/\u001b[39m (sigmas\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m2\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m \u001b[38;5;241m0.5\u001b[39m, x_t\u001b[38;5;241m.\u001b[39mndim)\n\u001b[0;32m--> 105\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdiffusion\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mp_mean_variance\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 106\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mx_t\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mc_in\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mclip_denoised\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclip_denoised\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_kwargs\u001b[49m\n\u001b[1;32m 107\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 108\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m, out[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpred_xstart\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n", + "File \u001b[0;32m~/Documents/playground/text-to-3d/shap-e/shap_e/diffusion/gaussian_diffusion.py:333\u001b[0m, in \u001b[0;36mGaussianDiffusion.p_mean_variance\u001b[0;34m(self, model, x, t, clip_denoised, denoised_fn, model_kwargs)\u001b[0m\n\u001b[1;32m 331\u001b[0m B, C \u001b[38;5;241m=\u001b[39m x\u001b[38;5;241m.\u001b[39mshape[:\u001b[38;5;241m2\u001b[39m]\n\u001b[1;32m 332\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m t\u001b[38;5;241m.\u001b[39mshape \u001b[38;5;241m==\u001b[39m (B,)\n\u001b[0;32m--> 333\u001b[0m model_output \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 334\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(model_output, \u001b[38;5;28mtuple\u001b[39m):\n\u001b[1;32m 335\u001b[0m model_output, extra \u001b[38;5;241m=\u001b[39m model_output\n", + "File \u001b[0;32m~/.pyenv/versions/textTo3d/lib/python3.11/site-packages/torch/nn/modules/module.py:1739\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1737\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1738\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1739\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.pyenv/versions/textTo3d/lib/python3.11/site-packages/torch/nn/modules/module.py:1750\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1747\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1748\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1749\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1750\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1752\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1753\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n", + "File \u001b[0;32m~/Documents/playground/text-to-3d/shap-e/shap_e/models/generation/latent_diffusion.py:21\u001b[0m, in \u001b[0;36mSplitVectorDiffusion.forward\u001b[0;34m(self, x, t, **kwargs)\u001b[0m\n\u001b[1;32m 19\u001b[0m h \u001b[38;5;241m=\u001b[39m x\u001b[38;5;241m.\u001b[39mreshape(x\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m], \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_ctx, \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\u001b[38;5;241m.\u001b[39mpermute(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 20\u001b[0m pre_channels \u001b[38;5;241m=\u001b[39m h\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m---> 21\u001b[0m h \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrapped\u001b[49m\u001b[43m(\u001b[49m\u001b[43mh\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m (\n\u001b[1;32m 23\u001b[0m h\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m pre_channels \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m2\u001b[39m\n\u001b[1;32m 24\u001b[0m ), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexpected twice as many outputs for variance prediction\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 25\u001b[0m eps, var \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mchunk(h, \u001b[38;5;241m2\u001b[39m, dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n", + "File \u001b[0;32m~/.pyenv/versions/textTo3d/lib/python3.11/site-packages/torch/nn/modules/module.py:1739\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1737\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1738\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1739\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.pyenv/versions/textTo3d/lib/python3.11/site-packages/torch/nn/modules/module.py:1750\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1747\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1748\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1749\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1750\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1752\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1753\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n", + "File \u001b[0;32m~/Documents/playground/text-to-3d/shap-e/shap_e/models/generation/transformer.py:298\u001b[0m, in \u001b[0;36mCLIPImagePointDiffusionTransformer.forward\u001b[0;34m(self, x, t, images, texts, embeddings)\u001b[0m\n\u001b[1;32m 295\u001b[0m clip_embed \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclip_embed(clip_out)\n\u001b[1;32m 297\u001b[0m cond \u001b[38;5;241m=\u001b[39m [(clip_embed, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtoken_cond), (t_embed, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtime_token_cond)]\n\u001b[0;32m--> 298\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_forward_with_cond\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcond\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Documents/playground/text-to-3d/shap-e/shap_e/models/generation/transformer.py:231\u001b[0m, in \u001b[0;36mPointDiffusionTransformer._forward_with_cond\u001b[0;34m(self, x, cond_as_token)\u001b[0m\n\u001b[1;32m 228\u001b[0m h \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mcat(extra_tokens \u001b[38;5;241m+\u001b[39m [h], dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 230\u001b[0m h \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mln_pre(h)\n\u001b[0;32m--> 231\u001b[0m h \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackbone\u001b[49m\u001b[43m(\u001b[49m\u001b[43mh\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 232\u001b[0m h \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mln_post(h)\n\u001b[1;32m 233\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(extra_tokens):\n", + "File \u001b[0;32m~/.pyenv/versions/textTo3d/lib/python3.11/site-packages/torch/nn/modules/module.py:1739\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1737\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1738\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1739\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.pyenv/versions/textTo3d/lib/python3.11/site-packages/torch/nn/modules/module.py:1750\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1747\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1748\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1749\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1750\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1752\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1753\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n", + "File \u001b[0;32m~/Documents/playground/text-to-3d/shap-e/shap_e/models/generation/transformer.py:147\u001b[0m, in \u001b[0;36mTransformer.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x: torch\u001b[38;5;241m.\u001b[39mTensor):\n\u001b[1;32m 146\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m block \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresblocks:\n\u001b[0;32m--> 147\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[43mblock\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 148\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m x\n", + "File \u001b[0;32m~/.pyenv/versions/textTo3d/lib/python3.11/site-packages/torch/nn/modules/module.py:1739\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1737\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1738\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1739\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.pyenv/versions/textTo3d/lib/python3.11/site-packages/torch/nn/modules/module.py:1750\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1747\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1748\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1749\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1750\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1752\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1753\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n", + "File \u001b[0;32m~/Documents/playground/text-to-3d/shap-e/shap_e/models/generation/transformer.py:109\u001b[0m, in \u001b[0;36mResidualAttentionBlock.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 108\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x: torch\u001b[38;5;241m.\u001b[39mTensor):\n\u001b[0;32m--> 109\u001b[0m x \u001b[38;5;241m=\u001b[39m x \u001b[38;5;241m+\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mln_1\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 110\u001b[0m x \u001b[38;5;241m=\u001b[39m x \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmlp(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mln_2(x))\n\u001b[1;32m 111\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m x\n", + "File \u001b[0;32m~/.pyenv/versions/textTo3d/lib/python3.11/site-packages/torch/nn/modules/module.py:1739\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1737\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1738\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1739\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.pyenv/versions/textTo3d/lib/python3.11/site-packages/torch/nn/modules/module.py:1750\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1747\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1748\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1749\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1750\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1752\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1753\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n", + "File \u001b[0;32m~/Documents/playground/text-to-3d/shap-e/shap_e/models/generation/transformer.py:42\u001b[0m, in \u001b[0;36mMultiheadAttention.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[1;32m 41\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mc_qkv(x)\n\u001b[0;32m---> 42\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[43mcheckpoint\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattention\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 43\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mc_proj(x)\n\u001b[1;32m 44\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m x\n", + "File \u001b[0;32m~/Documents/playground/text-to-3d/shap-e/shap_e/models/nn/checkpoint.py:24\u001b[0m, in \u001b[0;36mcheckpoint\u001b[0;34m(func, inputs, params, flag)\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m flag:\n\u001b[1;32m 23\u001b[0m args \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mtuple\u001b[39m(inputs) \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mtuple\u001b[39m(params)\n\u001b[0;32m---> 24\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mCheckpointFunction\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39minputs)\n", + "File \u001b[0;32m~/.pyenv/versions/textTo3d/lib/python3.11/site-packages/torch/autograd/function.py:575\u001b[0m, in \u001b[0;36mFunction.apply\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m 572\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_C\u001b[38;5;241m.\u001b[39m_are_functorch_transforms_active():\n\u001b[1;32m 573\u001b[0m \u001b[38;5;66;03m# See NOTE: [functorch vjp and autograd interaction]\u001b[39;00m\n\u001b[1;32m 574\u001b[0m args \u001b[38;5;241m=\u001b[39m _functorch\u001b[38;5;241m.\u001b[39mutils\u001b[38;5;241m.\u001b[39munwrap_dead_wrappers(args)\n\u001b[0;32m--> 575\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 577\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_setup_ctx_defined:\n\u001b[1;32m 578\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 579\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIn order to use an autograd.Function with functorch transforms \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 580\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(vmap, grad, jvp, jacrev, ...), it must override the setup_context \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 581\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstaticmethod. For more details, please see \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 582\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttps://pytorch.org/docs/main/notes/extending.func.html\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 583\u001b[0m )\n", + "File \u001b[0;32m~/.pyenv/versions/textTo3d/lib/python3.11/site-packages/torch/amp/autocast_mode.py:503\u001b[0m, in \u001b[0;36mcustom_fwd..decorate_fwd\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 501\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cast_inputs \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 502\u001b[0m args[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39m_fwd_used_autocast \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mis_autocast_enabled(device_type)\n\u001b[0;32m--> 503\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfwd\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 504\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 505\u001b[0m autocast_context \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mis_autocast_enabled(device_type)\n", + "File \u001b[0;32m~/Documents/playground/text-to-3d/shap-e/shap_e/models/nn/checkpoint.py:39\u001b[0m, in \u001b[0;36mCheckpointFunction.forward\u001b[0;34m(ctx, run_function, length, *args)\u001b[0m\n\u001b[1;32m 37\u001b[0m ctx\u001b[38;5;241m.\u001b[39msave_for_backward(\u001b[38;5;241m*\u001b[39minput_tensors, \u001b[38;5;241m*\u001b[39minput_params)\n\u001b[1;32m 38\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad():\n\u001b[0;32m---> 39\u001b[0m output_tensors \u001b[38;5;241m=\u001b[39m \u001b[43mctx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_function\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minput_tensors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output_tensors\n", + "File \u001b[0;32m~/.pyenv/versions/textTo3d/lib/python3.11/site-packages/torch/nn/modules/module.py:1739\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1737\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1738\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1739\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.pyenv/versions/textTo3d/lib/python3.11/site-packages/torch/nn/modules/module.py:1750\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1747\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1748\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1749\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1750\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1752\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1753\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n", + "File \u001b[0;32m~/Documents/playground/text-to-3d/shap-e/shap_e/models/generation/transformer.py:75\u001b[0m, in \u001b[0;36mQKVMultiheadAttention.forward\u001b[0;34m(self, qkv)\u001b[0m\n\u001b[1;32m 73\u001b[0m qkv \u001b[38;5;241m=\u001b[39m qkv\u001b[38;5;241m.\u001b[39mview(bs, n_ctx, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mheads, \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 74\u001b[0m q, k, v \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39msplit(qkv, attn_ch, dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m---> 75\u001b[0m weight \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43meinsum\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 76\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mbthc,bshc->bhts\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mq\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mscale\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mscale\u001b[49m\n\u001b[1;32m 77\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# More stable with f16 than dividing afterwards\u001b[39;00m\n\u001b[1;32m 78\u001b[0m wdtype \u001b[38;5;241m=\u001b[39m weight\u001b[38;5;241m.\u001b[39mdtype\n\u001b[1;32m 79\u001b[0m weight \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39msoftmax(weight\u001b[38;5;241m.\u001b[39mfloat(), dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\u001b[38;5;241m.\u001b[39mtype(wdtype)\n", + "File \u001b[0;32m~/.pyenv/versions/textTo3d/lib/python3.11/site-packages/torch/functional.py:210\u001b[0m, in \u001b[0;36meinsum\u001b[0;34m(*args)\u001b[0m\n\u001b[1;32m 203\u001b[0m \u001b[38;5;66;03m# Overwriting reason:\u001b[39;00m\n\u001b[1;32m 204\u001b[0m \u001b[38;5;66;03m# This dispatches to two ATen functions depending on the type of\u001b[39;00m\n\u001b[1;32m 205\u001b[0m \u001b[38;5;66;03m# split_size_or_sections. The branching code is in _tensor.py, which we\u001b[39;00m\n\u001b[1;32m 206\u001b[0m \u001b[38;5;66;03m# call here.\u001b[39;00m\n\u001b[1;32m 207\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m tensor\u001b[38;5;241m.\u001b[39msplit(split_size_or_sections, dim)\n\u001b[0;32m--> 210\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21meinsum\u001b[39m(\u001b[38;5;241m*\u001b[39margs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tensor:\n\u001b[1;32m 211\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"einsum(equation, *operands) -> Tensor\u001b[39;00m\n\u001b[1;32m 212\u001b[0m \n\u001b[1;32m 213\u001b[0m \u001b[38;5;124;03m Sums the product of the elements of the input :attr:`operands` along dimensions specified using a notation\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 351\u001b[0m \u001b[38;5;124;03m [ 0.3311, 5.5201, -3.0356]])\u001b[39;00m\n\u001b[1;32m 352\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 353\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mtorch\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbackends\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mopt_einsum\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mopt_einsum\u001b[39;00m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "batch_size = 4\n", + "guidance_scale = 15.0\n", + "prompt = \"a shark\"\n", + "\n", + "latents = sample_latents(\n", + " batch_size=batch_size,\n", + " model=model,\n", + " diffusion=diffusion,\n", + " guidance_scale=torch.tensor(guidance_scale, dtype=torch.float32), # Ensure float32\n", + " model_kwargs=dict(texts=[prompt] * batch_size),\n", + " progress=True,\n", + " clip_denoised=True,\n", + " use_fp16=False, # Disable FP16 (not supported on MPS)\n", + " use_karras=True,\n", + " karras_steps=64,\n", + " sigma_min=torch.tensor(1e-3, dtype=torch.float32), # Force float32\n", + " sigma_max=torch.tensor(160, dtype=torch.float32), # Force float32\n", + " s_churn=torch.tensor(0, dtype=torch.float32) # Force float32\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "633da2ec", + "metadata": {}, + "outputs": [], + "source": [ + "render_mode = 'nerf' # you can change this to 'stf'\n", + "size = 64 # this is the size of the renders; higher values take longer to render.\n", + "\n", + "cameras = create_pan_cameras(size, device)\n", + "for i, latent in enumerate(latents):\n", + " images = decode_latent_images(xm, latent, cameras, rendering_mode=render_mode)\n", + " display(gif_widget(images))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85a4dce4", + "metadata": {}, + "outputs": [], + "source": [ + "# Example of saving the latents as meshes.\n", + "from shap_e.util.notebooks import decode_latent_mesh\n", + "\n", + "for i, latent in enumerate(latents):\n", + " t = decode_latent_mesh(xm, latent).tri_mesh()\n", + " with open(f'example_mesh_{i}.ply', 'wb') as f:\n", + " t.write_ply(f)\n", + " with open(f'example_mesh_{i}.obj', 'w') as f:\n", + " t.write_obj(f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e6acecd", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}