From be8fc0f0eb9083f39a972b3b88d52361a28b354c Mon Sep 17 00:00:00 2001 From: AllentDan Date: Tue, 14 Jan 2025 16:33:07 +0800 Subject: [PATCH] use cuda() for internlm3 gptq --- lmdeploy/lite/apis/gptq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lmdeploy/lite/apis/gptq.py b/lmdeploy/lite/apis/gptq.py index 0e67913b4..658be4c36 100644 --- a/lmdeploy/lite/apis/gptq.py +++ b/lmdeploy/lite/apis/gptq.py @@ -98,7 +98,7 @@ def auto_gptq(model: str, quantize_config, revision=revision, torch_dtype=torch_dtype, - trust_remote_code=True) + trust_remote_code=True).cuda() # quantize model, the examples should be list of dict whose keys # can only be "input_ids" and "attention_mask"