Skip to content

Commit 115dee1

Browse files
committed
Merge branch 'k_quant' of https://github.com/jiafatom/neural-compressor into k_quant
2 parents c3318cf + 6015feb commit 115dee1

File tree

1 file changed

+1
-3
lines changed

1 file changed

+1
-3
lines changed

neural_compressor/adaptor/ox_utils/weight_only.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -535,9 +535,7 @@ def rtn_quantize(
535535
# MatMulFpQ4 support 4 bits and 32 group_size with ort 1.16.0 and 1.16.1 versions, supported by CPU EP
536536
# MatMulNBits supports 4 bits and 2^n group_size with ort > 1.16.1, supported by CPU EP AND CUDA EP
537537
if algorithm == "k_quant":
538-
q_weight, scale, zp = quant_tensor_k_quant_cuda(
539-
weight.T, num_bits, group_size
540-
)
538+
q_weight, scale, zp = quant_tensor_k_quant_cuda(weight.T, num_bits, group_size)
541539
else:
542540
q_weight, scale, zp = quant_tensor(
543541
weight.T, num_bits, group_size, scheme, "uint", ratios.get(node.input[1], 1)

0 commit comments

Comments
 (0)