We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
hey, while running on 4bit quantized model from https://huggingface.co/ThetaCursed/Ovis1.6-Gemma2-9B-bnb-4bit i am getting the following error
{ "name": "RuntimeError", "message": "self and mat2 must have the same dtype, but got BFloat16 and Byte", "stack": "--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) Cell In[3], line 35 23 with torch.inference_mode(): 24 gen_kwargs = dict( 25 max_new_tokens=1024, 26 do_sample=False, (...) 33 use_cache=True 34 ) ---> 35 output_ids = model.generate(input_ids, pixel_values=pixel_values, attention_mask=attention_mask, **gen_kwargs)[0] 36 output = text_tokenizer.decode(output_ids, skip_special_tokens=True) 37 print(f'Output:\ {output}') File ~/.cache/huggingface/modules/transformers_modules/AIDC-AI/Ovis1.6-Gemma2-9B/15954d60650e5d6f3cfebcb9407e809b4c5019e1/modeling_ovis.py:588, in Ovis.generate(self, inputs, **kwargs) 583 def generate( 584 self, 585 inputs: Optional[torch.Tensor] = None, 586 **kwargs 587 ) -> Union[GenerateOutput, torch.LongTensor]: --> 588 _, inputs_embeds, labels, attention_mask = self.merge_multimodal( 589 text_input_ids=inputs, 590 text_attention_masks=kwargs.pop('attention_mask'), 591 text_labels=None, 592 pixel_values=kwargs.pop('pixel_values'), 593 left_padding=True 594 ) 595 if getattr(self.generation_config, 'cache_implementation') == 'hybrid': # mainly for Gemma2 596 kwargs['past_key_values'] = self._get_hybrid_cache_for_llm( 597 getattr(kwargs, \"num_beams\", inputs_embeds.shape[0]), kwargs['max_new_tokens'] + inputs_embeds.shape[-2]) File ~/.cache/huggingface/modules/transformers_modules/AIDC-AI/Ovis1.6-Gemma2-9B/15954d60650e5d6f3cfebcb9407e809b4c5019e1/modeling_ovis.py:385, in Ovis.merge_multimodal(self, text_input_ids, text_attention_masks, text_labels, pixel_values, left_padding) 383 num_images = [x.shape[0] if x is not None else 0 for x in pixel_values] 384 if sum(num_images) > 0: --> 385 visual_tokens = self.visual_tokenizer(torch.cat([x for x in pixel_values if x is not None], dim=0)) 386 visual_embeds = torch.split(self.get_vte()(visual_tokens).to(dtype=self.dtype, device=input_device), 387 split_size_or_sections=num_images, dim=0) 388 visual_input_ids = torch.split(torch.argmax(visual_tokens, dim=-1).to(device=input_device), 389 split_size_or_sections=num_images, dim=0) File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs) 1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1510 else: -> 1511 return self._call_impl(*args, **kwargs) File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs) 1515 # If we don't have any hooks, we want to skip the rest of the logic in 1516 # this function, and just call forward. 1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1518 or _global_backward_pre_hooks or _global_backward_hooks 1519 or _global_forward_hooks or _global_forward_pre_hooks): -> 1520 return forward_call(*args, **kwargs) 1522 try: 1523 result = None File ~/.local/lib/python3.8/site-packages/accelerate/hooks.py:170, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs) 168 output = module._old_forward(*args, **kwargs) 169 else: --> 170 output = module._old_forward(*args, **kwargs) 171 return module._hf_hook.post_forward(module, output) File ~/.cache/huggingface/modules/transformers_modules/AIDC-AI/Ovis1.6-Gemma2-9B/15954d60650e5d6f3cfebcb9407e809b4c5019e1/modeling_ovis.py:221, in BaseVisualTokenizer.forward(self, pixel_values) 220 def forward(self, pixel_values) -> torch.Tensor: # [BatchSize, ImageShape] -> [BatchSize, #Token, VocabSize] --> 221 features = self.encode(pixel_values) 222 logits = self.head(features) 223 tokens = self.tokenize(logits) File ~/.cache/huggingface/modules/transformers_modules/AIDC-AI/Ovis1.6-Gemma2-9B/15954d60650e5d6f3cfebcb9407e809b4c5019e1/modeling_ovis.py:196, in BaseVisualTokenizer.encode(self, pixel_values) 195 def encode(self, pixel_values): --> 196 output = self.backbone(pixel_values, output_hidden_states=True, return_dict=True) 197 features = output.hidden_states[-1] 198 if self.config.drop_cls_token: File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs) 1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1510 else: -> 1511 return self._call_impl(*args, **kwargs) File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs) 1515 # If we don't have any hooks, we want to skip the rest of the logic in 1516 # this function, and just call forward. 1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1518 or _global_backward_pre_hooks or _global_backward_hooks 1519 or _global_forward_hooks or _global_forward_pre_hooks): -> 1520 return forward_call(*args, **kwargs) 1522 try: 1523 result = None File ~/.local/lib/python3.8/site-packages/accelerate/hooks.py:170, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs) 168 output = module._old_forward(*args, **kwargs) 169 else: --> 170 output = module._old_forward(*args, **kwargs) 171 return module._hf_hook.post_forward(module, output) File ~/.local/lib/python3.8/site-packages/transformers/models/siglip/modeling_siglip.py:1189, in SiglipVisionModel.forward(self, pixel_values, output_attentions, output_hidden_states, return_dict, interpolate_pos_encoding) 1165 r\"\"\" 1166 Returns: 1167 (...) 1185 >>> pooled_output = outputs.pooler_output # pooled features 1186 ```\"\"\" 1187 return_dict = return_dict if return_dict is not None else self.config.use_return_dict -> 1189 return self.vision_model( 1190 pixel_values=pixel_values, 1191 output_attentions=output_attentions, 1192 output_hidden_states=output_hidden_states, 1193 return_dict=return_dict, 1194 interpolate_pos_encoding=interpolate_pos_encoding, 1195 ) File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs) 1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1510 else: -> 1511 return self._call_impl(*args, **kwargs) File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs) 1515 # If we don't have any hooks, we want to skip the rest of the logic in 1516 # this function, and just call forward. 1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1518 or _global_backward_pre_hooks or _global_backward_hooks 1519 or _global_forward_hooks or _global_forward_pre_hooks): -> 1520 return forward_call(*args, **kwargs) 1522 try: 1523 result = None File ~/.local/lib/python3.8/site-packages/accelerate/hooks.py:170, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs) 168 output = module._old_forward(*args, **kwargs) 169 else: --> 170 output = module._old_forward(*args, **kwargs) 171 return module._hf_hook.post_forward(module, output) File ~/.local/lib/python3.8/site-packages/transformers/models/siglip/modeling_siglip.py:1100, in SiglipVisionTransformer.forward(self, pixel_values, output_attentions, output_hidden_states, return_dict, interpolate_pos_encoding) 1097 last_hidden_state = encoder_outputs[0] 1098 last_hidden_state = self.post_layernorm(last_hidden_state) -> 1100 pooler_output = self.head(last_hidden_state) if self.use_head else None 1101 if not return_dict: 1102 return (last_hidden_state, pooler_output) + encoder_outputs[1:] File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs) 1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1510 else: -> 1511 return self._call_impl(*args, **kwargs) File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs) 1515 # If we don't have any hooks, we want to skip the rest of the logic in 1516 # this function, and just call forward. 1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1518 or _global_backward_pre_hooks or _global_backward_hooks 1519 or _global_forward_hooks or _global_forward_pre_hooks): -> 1520 return forward_call(*args, **kwargs) 1522 try: 1523 result = None File ~/.local/lib/python3.8/site-packages/accelerate/hooks.py:170, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs) 168 output = module._old_forward(*args, **kwargs) 169 else: --> 170 output = module._old_forward(*args, **kwargs) 171 return module._hf_hook.post_forward(module, output) File ~/.local/lib/python3.8/site-packages/transformers/models/siglip/modeling_siglip.py:1127, in SiglipMultiheadAttentionPoolingHead.forward(self, hidden_state) 1124 batch_size = hidden_state.shape[0] 1125 probe = self.probe.repeat(batch_size, 1, 1) -> 1127 hidden_state = self.attention(probe, hidden_state, hidden_state)[0] 1129 residual = hidden_state 1130 hidden_state = self.layernorm(hidden_state) File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs) 1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1510 else: -> 1511 return self._call_impl(*args, **kwargs) File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs) 1515 # If we don't have any hooks, we want to skip the rest of the logic in 1516 # this function, and just call forward. 1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1518 or _global_backward_pre_hooks or _global_backward_hooks 1519 or _global_forward_hooks or _global_forward_pre_hooks): -> 1520 return forward_call(*args, **kwargs) 1522 try: 1523 result = None File ~/.local/lib/python3.8/site-packages/accelerate/hooks.py:170, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs) 168 output = module._old_forward(*args, **kwargs) 169 else: --> 170 output = module._old_forward(*args, **kwargs) 171 return module._hf_hook.post_forward(module, output) File ~/.local/lib/python3.8/site-packages/torch/nn/modules/activation.py:1241, in MultiheadAttention.forward(self, query, key, value, key_padding_mask, need_weights, attn_mask, average_attn_weights, is_causal) 1227 attn_output, attn_output_weights = F.multi_head_attention_forward( 1228 query, key, value, self.embed_dim, self.num_heads, 1229 self.in_proj_weight, self.in_proj_bias, (...) 1238 average_attn_weights=average_attn_weights, 1239 is_causal=is_causal) 1240 else: -> 1241 attn_output, attn_output_weights = F.multi_head_attention_forward( 1242 query, key, value, self.embed_dim, self.num_heads, 1243 self.in_proj_weight, self.in_proj_bias, 1244 self.bias_k, self.bias_v, self.add_zero_attn, 1245 self.dropout, self.out_proj.weight, self.out_proj.bias, 1246 training=self.training, 1247 key_padding_mask=key_padding_mask, 1248 need_weights=need_weights, 1249 attn_mask=attn_mask, 1250 average_attn_weights=average_attn_weights, 1251 is_causal=is_causal) 1252 if self.batch_first and is_batched: 1253 return attn_output.transpose(1, 0), attn_output_weights File ~/.local/lib/python3.8/site-packages/torch/nn/functional.py:5449, in multi_head_attention_forward(query, key, value, embed_dim_to_check, num_heads, in_proj_weight, in_proj_bias, bias_k, bias_v, add_zero_attn, dropout_p, out_proj_weight, out_proj_bias, training, key_padding_mask, need_weights, attn_mask, use_separate_proj_weight, q_proj_weight, k_proj_weight, v_proj_weight, static_k, static_v, average_attn_weights, is_causal) 5446 attn_output = torch.bmm(attn_output_weights, v) 5448 attn_output = attn_output.transpose(0, 1).contiguous().view(tgt_len * bsz, embed_dim) -> 5449 attn_output = linear(attn_output, out_proj_weight, out_proj_bias) 5450 attn_output = attn_output.view(tgt_len, bsz, attn_output.size(1)) 5452 # optionally average attention weights over heads RuntimeError: self and mat2 must have the same dtype, but got BFloat16 and Byte" }
The text was updated successfully, but these errors were encountered:
I have encountered the same problem. Is there a solution now
Sorry, something went wrong.
We've released quantized versions of Ovis1.6: Ovis1.6-Gemma2-9B-GPTQ-Int4 and Ovis1.6-Llama3.2-3B-GPTQ-Int4. Feel free to try them out and share your feedback!
No branches or pull requests
hey, while running on 4bit quantized model from https://huggingface.co/ThetaCursed/Ovis1.6-Gemma2-9B-bnb-4bit i am getting the following error
The text was updated successfully, but these errors were encountered: