We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 50fb8a9 commit 09b592bCopy full SHA for 09b592b
examples/models/llama/source_transformation/quantize.py
@@ -14,8 +14,6 @@
14
import torch.nn as nn
15
import torch.nn.functional as F
16
17
-from executorch.backends.vulkan._passes import VkInt4WeightOnlyQuantizer
18
-
19
from executorch.extension.llm.export.builder import DType
20
21
from sentencepiece import SentencePieceProcessor
@@ -180,6 +178,8 @@ def quantize( # noqa C901
180
178
model = gptq_quantizer.quantize(model, inputs)
181
179
return model
182
elif qmode == "vulkan_4w":
+ from executorch.backends.vulkan._passes import VkInt4WeightOnlyQuantizer
+
183
q_group_size = 256 if group_size is None else group_size
184
model = VkInt4WeightOnlyQuantizer(groupsize=q_group_size).quantize(model)
185
0 commit comments