Skip to content

Commit 97d75dc

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 932b214 commit 97d75dc

File tree

1 file changed

+22
-22
lines changed

1 file changed

+22
-22
lines changed

neural_compressor/adaptor/ox_utils/weight_only.py

+22-22
Original file line numberDiff line numberDiff line change
@@ -261,44 +261,44 @@ def quant_tensor_k_quant_cpu(data, num_bits=4, group_size=32):
261261
scale: scale
262262
zero_point: zero point
263263
"""
264-
data = np.reshape(data, (-1, group_size)).astype(np.float32) # nb = data.shape[0], (nb, group_size)
264+
data = np.reshape(data, (-1, group_size)).astype(np.float32) # nb = data.shape[0], (nb, group_size)
265265
maxq = 2**num_bits - 1
266266
minq = 0
267-
sum_x2 = np.sum(data**2, axis=1, keepdims=True) # (nb, 1)
268-
av_x = np.sqrt(sum_x2 / group_size) # (nb, 1)
269-
weights = np.add(av_x, np.abs(data)) # (nb, group_size)
270-
rmin = np.min(data, axis=1, keepdims=True) # (nb, 1)
271-
rmax = np.max(data, axis=1, keepdims=True) # (nb, 1)
272-
sum_w = np.sum(weights, axis=1, keepdims=True) # (nb, 1)
273-
sum_x = np.sum(weights * data, axis=1, keepdims=True) # (nb, group_size)
274-
iscale = np.ones(rmax.shape, dtype=data.dtype) # (nb, 1)
267+
sum_x2 = np.sum(data**2, axis=1, keepdims=True) # (nb, 1)
268+
av_x = np.sqrt(sum_x2 / group_size) # (nb, 1)
269+
weights = np.add(av_x, np.abs(data)) # (nb, group_size)
270+
rmin = np.min(data, axis=1, keepdims=True) # (nb, 1)
271+
rmax = np.max(data, axis=1, keepdims=True) # (nb, 1)
272+
sum_w = np.sum(weights, axis=1, keepdims=True) # (nb, 1)
273+
sum_x = np.sum(weights * data, axis=1, keepdims=True) # (nb, group_size)
274+
iscale = np.ones(rmax.shape, dtype=data.dtype) # (nb, 1)
275275
mask = rmin != rmax
276276
iscale[mask] = (maxq - minq) / (rmax[mask] - rmin[mask])
277277
scale = 1 / iscale
278-
quant_data = np.clip(np.round(iscale * (data - rmin)), minq, maxq) # (nb, group_size)
279-
diff = scale * quant_data + rmin - data # (nb, group_size)
280-
best_mad = np.sum(weights * diff ** 2, axis=1, keepdims=True) # (nb, 1)
278+
quant_data = np.clip(np.round(iscale * (data - rmin)), minq, maxq) # (nb, group_size)
279+
diff = scale * quant_data + rmin - data # (nb, group_size)
280+
best_mad = np.sum(weights * diff**2, axis=1, keepdims=True) # (nb, 1)
281281
nstep = 20
282282
rdelta = 0.1
283283
# nstep * rdelta = -2 * rrmin, maxq - minq = 2**num_bits - 1
284284
rrmin = -1
285285
for is_ in range(nstep):
286-
iscale_new = np.ones(rmax.shape, dtype=data.dtype) # (nb, 1)
286+
iscale_new = np.ones(rmax.shape, dtype=data.dtype) # (nb, 1)
287287
factor = np.array([rrmin + rdelta * is_ + maxq - minq]).astype(data.dtype)[0]
288288
mask = rmin != rmax
289289
iscale_new[mask] = factor / (rmax[mask] - rmin[mask])
290-
quant_data_new = np.clip(np.round(iscale_new * (data - rmin)), minq, maxq) # (nb, group_size)
290+
quant_data_new = np.clip(np.round(iscale_new * (data - rmin)), minq, maxq) # (nb, group_size)
291291
mul_weights_quant_data_new = weights * quant_data_new
292-
sum_l = np.sum(mul_weights_quant_data_new, axis=1, keepdims=True) # (nb, 1)
293-
sum_l2 = np.sum(mul_weights_quant_data_new * quant_data_new, axis=1, keepdims=True) # (nb, 1)
294-
sum_xl = np.sum(mul_weights_quant_data_new * data, axis=1, keepdims=True) # (nb, 1)
295-
D = np.subtract(sum_w * sum_l2, sum_l ** 2) # (nb, 1)
292+
sum_l = np.sum(mul_weights_quant_data_new, axis=1, keepdims=True) # (nb, 1)
293+
sum_l2 = np.sum(mul_weights_quant_data_new * quant_data_new, axis=1, keepdims=True) # (nb, 1)
294+
sum_xl = np.sum(mul_weights_quant_data_new * data, axis=1, keepdims=True) # (nb, 1)
295+
D = np.subtract(sum_w * sum_l2, sum_l**2) # (nb, 1)
296296

297-
this_scale = (sum_w * sum_xl - sum_x * sum_l) / D # (nb, 1)
298-
this_min = (sum_l2 * sum_x - sum_l * sum_xl) / D # (nb, 1)
297+
this_scale = (sum_w * sum_xl - sum_x * sum_l) / D # (nb, 1)
298+
this_min = (sum_l2 * sum_x - sum_l * sum_xl) / D # (nb, 1)
299299

300-
diff = this_scale * quant_data_new + this_min - data # (nb, group_size)
301-
mad = np.sum(weights * diff ** 2, axis=1, keepdims=True) # (nb, 1)
300+
diff = this_scale * quant_data_new + this_min - data # (nb, group_size)
301+
mad = np.sum(weights * diff**2, axis=1, keepdims=True) # (nb, 1)
302302

303303
mad_1 = np.array(mad)
304304
best_mad_1 = np.array(best_mad)

0 commit comments

Comments
 (0)