Closed
Description
System Info
🤗 Diffusers version: 0.33.0.dev0
Platform: Windows-10-10.0.26100-SP0
Running on Google Colab?: No
Python version: 3.10.11
PyTorch version (GPU?): 2.5.1+cu124 (True)
Flax version (CPU?/GPU?/TPU?): not installed (NA)
Jax version: not installed
JaxLib version: not installed
Huggingface_hub version: 0.28.1
Transformers version: 4.46.3
Accelerate version: 1.4.0.dev0
PEFT version: not installed
Bitsandbytes version: 0.45.3.dev0
Safetensors version: 0.5.2
xFormers version: 0.0.29.post1
Accelerator: NVIDIA GeForce RTX 4060 Laptop GPU, 8188 MiB
Using GPU in script?:
Using distributed or parallel set-up in script?:
Reproduction
I have quantized int4 (trasnformer and text-encoder) and saved them, rest remains same and trying to use it.
import torch
import gc
from diffusers.models import HunyuanVideoTransformer3DModel
from diffusers.utils import export_to_video
from diffusers import HunyuanVideoPipeline
from diffusers import BitsAndBytesConfig
from transformers import LlamaModel, CLIPTextModel
model_id = "newgenai79/HunyuanVideo-int4"
pipe = HunyuanVideoPipeline.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
)
pipe.enable_sequential_cpu_offload()
pipe.vae.enable_tiling()
prompt="A cat wearing sunglasses and working as a lifeguard at pool."
generator = torch.Generator().manual_seed(181201)
output = pipe(
prompt,
width=512,
height=320,
num_frames=17,
num_inference_steps=30,
generator=generator
)[0]
print("----Inference complete..")
export_to_video(output, "hunyuan_test.mp4", fps=8)
getting error
(venv) C:\aitest\SkyReels-V1>python hunyuan_bug.py
Loading pipeline components...: 0%| | 0/7 [00:00<?, ?it/s]Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Loading pipeline components...: 100%|███████████████████████████████| 7/7 [00:31<00:00, 4.49s/it]
Traceback (most recent call last):
File "C:\aitest\SkyReels-V1\hunyuan_bug.py", line 29, in <module>
output = pipe(
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\diffusers\pipelines\hunyuan_video\pipeline_hunyuan_video.py", line 598, in __call__
prompt_embeds, pooled_prompt_embeds, prompt_attention_mask = self.encode_prompt(
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\diffusers\pipelines\hunyuan_video\pipeline_hunyuan_video.py", line 318, in encode_prompt
prompt_embeds, prompt_attention_mask = self._get_llama_prompt_embeds(
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\diffusers\pipelines\hunyuan_video\pipeline_hunyuan_video.py", line 245, in _get_llama_prompt_embeds
prompt_embeds = self.text_encoder(
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\torch\nn\modules\module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\accelerate\hooks.py", line 176, in new_forward
output = module._old_forward(*args, **kwargs)
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\transformers\models\llama\modeling_llama.py", line 945, in forward
layer_outputs = decoder_layer(
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\torch\nn\modules\module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\transformers\models\llama\modeling_llama.py", line 676, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\torch\nn\modules\module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\transformers\models\llama\modeling_llama.py", line 559, in forward
query_states = self.q_proj(hidden_states)
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\torch\nn\modules\module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\torch\nn\modules\module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\accelerate\hooks.py", line 171, in new_forward
args, kwargs = module._hf_hook.pre_forward(module, *args, **kwargs)
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\accelerate\hooks.py", line 361, in pre_forward
set_module_tensor_to_device(
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\accelerate\utils\modeling.py", line 350, in set_module_tensor_to_device
new_value = param_cls(new_value, requires_grad=old_value.requires_grad, **kwargs).to(device)
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\bitsandbytes\nn\modules.py", line 331, in to
return self._quantize(device)
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\bitsandbytes\nn\modules.py", line 296, in _quantize
w_4bit, quant_state = bnb.functional.quantize_4bit(
File "C:\aitest\SkyReels-V1\venv\lib\site-packages\bitsandbytes\functional.py", line 1243, in quantize_4bit
raise ValueError(f"Blockwise quantization only supports 16/32-bit floats, but got {A.dtype}")
ValueError: Blockwise quantization only supports 16/32-bit floats, but got torch.uint8
Expected behavior
Should work.