From be8fc0f0eb9083f39a972b3b88d52361a28b354c Mon Sep 17 00:00:00 2001
From: AllentDan <AllentDan@yeah.net>
Date: Tue, 14 Jan 2025 16:33:07 +0800
Subject: [PATCH] use cuda() for internlm3 gptq

---
 lmdeploy/lite/apis/gptq.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lmdeploy/lite/apis/gptq.py b/lmdeploy/lite/apis/gptq.py
index 0e67913b4..658be4c36 100644
--- a/lmdeploy/lite/apis/gptq.py
+++ b/lmdeploy/lite/apis/gptq.py
@@ -98,7 +98,7 @@ def auto_gptq(model: str,
                                                 quantize_config,
                                                 revision=revision,
                                                 torch_dtype=torch_dtype,
-                                                trust_remote_code=True)
+                                                trust_remote_code=True).cuda()
 
     # quantize model, the examples should be list of dict whose keys
     # can only be "input_ids" and "attention_mask"