Skip to content

Commit 09b592b

Browse files
authored
move import of VK 4-bit source quantizer into function
Differential Revision: D70268708 Pull Request resolved: #8744
1 parent 50fb8a9 commit 09b592b

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

examples/models/llama/source_transformation/quantize.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@
1414
import torch.nn as nn
1515
import torch.nn.functional as F
1616

17-
from executorch.backends.vulkan._passes import VkInt4WeightOnlyQuantizer
18-
1917
from executorch.extension.llm.export.builder import DType
2018

2119
from sentencepiece import SentencePieceProcessor
@@ -180,6 +178,8 @@ def quantize( # noqa C901
180178
model = gptq_quantizer.quantize(model, inputs)
181179
return model
182180
elif qmode == "vulkan_4w":
181+
from executorch.backends.vulkan._passes import VkInt4WeightOnlyQuantizer
182+
183183
q_group_size = 256 if group_size is None else group_size
184184
model = VkInt4WeightOnlyQuantizer(groupsize=q_group_size).quantize(model)
185185

0 commit comments

Comments
 (0)