Skip to content

Commit

Permalink
Fix pkv dtype (huggingface#481)
Browse files Browse the repository at this point in the history
* pkv dtype

* fix dtype
  • Loading branch information
jiqing-feng authored Dec 6, 2023
1 parent 8e1ad36 commit 3da80f6
Showing 1 changed file with 13 additions and 1 deletion.
14 changes: 13 additions & 1 deletion optimum/intel/generation/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,24 @@
logger = logging.getLogger(__name__)


def get_float_type(model_dtype: torch.dtype):
if model_dtype == torch.bfloat16:
return "bf16"
elif model_dtype == torch.float16:
return "fp16"
else:
return "fp32"


def prepare_jit_inputs(model: PreTrainedModel, task: str, use_cache: bool = False):
task = _TASK_ALIASES.get(task, task)
signature = inspect.signature(model.forward) if hasattr(model, "forward") else inspect.signature(model.__call__)
onnx_config_class = TasksManager.get_exporter_config_constructor(model=model, exporter="onnx", task=task)
float_dtype = get_float_type(model.dtype)
if "text-generation" in task:
onnx_config = onnx_config_class(model.config, use_past=use_cache, use_past_in_inputs=use_cache)
onnx_config = onnx_config_class(
model.config, use_past=use_cache, use_past_in_inputs=use_cache, float_dtype=float_dtype
)
else:
onnx_config = onnx_config_class(model.config)

Expand Down

0 comments on commit 3da80f6

Please sign in to comment.